view easyjoin.xml @ 0:631dfde45073 draft default tip

First tool-shed public version
author gordon
date Tue, 09 Oct 2012 18:48:06 -0400
parents
children
line wrap: on
line source

<tool id="cshl_easyjoin" name="Join" version="0.1.1">
  <description>two files</description>
  <command interpreter="perl">easyjoin $jointype
		-t '	'
		$header
		-e '$empty_string_filler'
		-o auto
		$ignore_case
		-1 "$column1"
		-2 "$column2"
		"$input1" "$input2"
		&gt; '$output'
  </command>

  <inputs>
	<param format="txt" name="input1" type="data" label="1st file" />
	<param name="column1" label="Column to use from 1st file" type="data_column" data_ref="input1" accept_default="true" />

	<param format="txt" name="input2" type="data" label="2nd File" />
	<param name="column2" label="Column to use from 2nd file" type="data_column" data_ref="input2" accept_default="true" />

	<param name="jointype" type="select" label="Output lines appearing in">
	      <option value=" ">BOTH 1st &amp; 2nd file.</option>
	      <option value="-v 1">1st but not in 2nd file. [-v 1]</option>
	      <option value="-v 2">2nd but not in 1st file. [-v 2]</option>
	      <option value="-a 1">both 1st &amp; 2nd file, plus unpairable lines from 1st file. [-a 1]</option>
	      <option value="-a 2">both 1st &amp; 2nd file, plus unpairable lines from 2st file. [-a 2]</option>
	      <option value="-a 1 -a 2">All Lines [-a 1 -a 2]</option>
	      <option value="-v 1 -v 2">All unpairable lines [-v 1 -v 2]</option>
	</param>

	<param name="header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="First line is a header line" help="Use if first line contains column headers. It will not be sorted." />

	<param name="ignore_case" type="boolean" checked="false" truevalue="-i" falsevalue="" label="Ignore case" help="Sort and Join key column values regardless of upper/lower case letters." />

	<param name="empty_string_filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
		<sanitizer>
		    <valid initial="string.printable">
			    <remove value="&apos;"/>
		    </valid>
	    </sanitizer>
	</param>

  </inputs>
  <outputs>
    <data name="output" format="input" metadata_source="input1"
	/>
  </outputs>

<help>
**What it does**

This tool joins two tabular files based on a common key column.

-----

**Example**

**First file**::

    Fruit	Color
    Apple	red
    Banana	yellow
    Orange	orange
    Melon	green

**Second File**::

    Fruit	Price
    Orange	7
    Avocado	8
    Apple	4
    Banana	3

**Joining** both files, using **key column 1** and a **header line**, will return::

    Fruit	Color	Price
    Apple	red	4
    Avocado	.	8
    Banana	yellow	3
    Melon	green	.
    Orange	orange	7

# Input files need not be sorted.
# The header line (**Fruit  Color  Price**) was joined and kept as first line.
# Missing values ( Avocado's color, missing from the first file ) are replaced with a period character.

-----

*easyjoin* was written by A. Gordon

</help>
</tool>