Mercurial > repos > bgruening > column_arrange_by_header
changeset 0:b84c81b7d762 draft
Uploaded
author | bgruening |
---|---|
date | Mon, 02 Mar 2015 09:59:32 -0500 |
parents | |
children | b97f0d9a31ff |
files | columnArrange.xml column_arrange.py test-data/columnarrange_input1.tab test-data/columnarrange_input2.tab test-data/columnarrange_result1.tab test-data/columnarrange_result2.tab tool_dependencies.xml |
diffstat | 7 files changed, 124 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/columnArrange.xml Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,78 @@ +<tool id="bg_column_arrange_by_header" name="Column arrange" version="0.1"> + <description>by header name</description> + <requirements> + <requirement type="package" version="0.14.1">pandas</requirement> + </requirements> + <!--<code file="column_sort_tab_options.py"/>--> + <command interpreter="python"> + <![CDATA[ + column_arrange.py + --columns + #for token in $rep_param_columns: + "$token.param_column" + #end for + --in $param_input + --out $output + ]]> + </command> + <inputs> + <param format="tabular" name="param_input" type="data" label="file to rearrange" /> + + <repeat name ="rep_param_columns" min="1" title="Specify the first few columns by name"> + + <param name="param_column" type="text" size="50" value="" label="column"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + </sanitizer> + </param> + + </repeat> + <!--<param name="param_columns_dummy" type="select" optional="true" multiple="true" label="Columns" dynamic_options="get_options(param_input.file_name)" help="Do not select columns here, this field just shows possible columns."/>--> + </inputs> + <outputs> + <data format="tabular" name="output" /> + </outputs> + <tests> + <test> + <param name="param_input" value="columnarrange_input1.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="fname"/> + </repeat> + <repeat name="rep_param_columns"> + <param name="param_column" value="age"/> + </repeat> + <output name="out" file="columnarrange_result1.tab"/> + </test> + <test> + <param name="param_input" value="columnarrange_input2.tab"/> + <repeat name="rep_param_columns"> + <param name="param_column" value="first name"/> + </repeat> + <repeat name="rep_param_columns"> + <param name="param_column" value="nationality"/> + </repeat> + <output name="out" file="columnarrange_result2.tab"/> + </test> + </tests> + <help> +**What it does** + +With this tool you can specify (by naming the header) which columns need to be leftmost. The columns which are not specified will be ordered as before, right of the columns which were specified. + +Input file:: + + AHeader BHeader CHeader DHeader + a b c d + a b c d + +Specifying **CHeader** and **BHeader**, as the columns that should be leftmost, generates:: + + CHeader BHeader AHeader DHeader + c b a d + c b a d + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/column_arrange.py Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,20 @@ +#!/usr/bin/env python +import pandas as pd +import argparse + + +parser = argparse.ArgumentParser() +parser.add_argument('-i', '--input', help='Tabular Input File Name') +parser.add_argument('-o','--output', help='Tabular Output File') +parser.add_argument('-c', '--columns',nargs='+', help='Column Headers to Sort By') +args=parser.parse_args() + +cols=args.columns +table=pd.read_csv(args.input,sep='\t') +blist = list(table.columns) +for token in cols: + blist.remove(token) +sorted_table = table[args.columns + blist] +# write without index, seperated by tabs +sorted_table.to_csv(args.output,sep='\t',index=False) +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_input1.tab Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,5 @@ +name fname age nationality +Woryt Heiko 22 german +Humte Alfons 49 austrian +Witz Gerald 12 french +Koulibaly Ansgard 33 nigerian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_input2.tab Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,5 @@ +second name age nationality first name +Woryt 22 german Heiko +Humte 49 austrian Alfons +Witz 12 french Gerald +Koulibaly 33 nigerian Ansgard
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_result1.tab Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,5 @@ +fname age name nationality +Heiko 22 Woryt german +Alfons 49 Humte austrian +Gerald 12 Witz french +Ansgard 33 Koulibaly nigerian
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/columnarrange_result2.tab Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,5 @@ +first name nationality second name age +Heiko german Woryt 22 +Alfons austrian Humte 49 +Gerald french Witz 12 +Ansgard nigerian Koulibaly 33
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Mar 02 09:59:32 2015 -0500 @@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="pandas" version="0.14.1"> + <repository changeset_revision="ef98e20431a7" name="package_pandas_0_14" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>