view multijoin.xml @ 0:ec66f9d90ef0 draft

initial uploaded
author bgruening
date Thu, 05 Sep 2013 04:58:21 -0400
parents
children a4ad586d1403
line wrap: on
line source

<tool id="unixtools_multijoin'_tool" name="Multi-Join" version="0.1.1">
  <description>(combine multiple files)</description>
  <command interpreter="perl">multijoin
        --key '$key_column'
        --values '$value_columns'
        --filler '$filler'
        $ignore_dups
        $output_header
        $input_header
        #for $file in $files
            '$file.filename'
        #end for
        &gt; '$output'
  </command>

  <inputs>
    <repeat name="files" title="file to join">
        <param name="filename" label="Add file" type="data" format="txt" />
    </repeat>

    <param name="key_column" label="Common key column" type="integer"
       value="1" help="Usually gene-ID or other common value" />

    <param name="value_columns" label="Column with values to preserve" type="text"
       value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8">
        <sanitizer>
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
        </sanitizer>
    </param>

    <param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
    <param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
    <param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
    <param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
        <sanitizer>
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
        </sanitizer>
    </param>

  </inputs>
  <outputs>
    <data name="output" format="input" metadata_source="input1" />
  </outputs>

<help>
**What it does**

This tool joins multiple tabular files based on a common key column.

-----

**Example**

To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:

**First file (AAA)**::

    chr4    888449    890171    FBtr0308778    0    +    266    1527    1722
    chr4    972167    979017    FBtr0310651    0    -    3944    6428    6850
    chr4    972186    979017    FBtr0089229    0    -    3944    6428    6831
    chr4    972186    979017    FBtr0089231    0    -    3944    6428    6831
    chr4    972186    979017    FBtr0089233    0    -    3944    6428    6831
    chr4    995793    996435    FBtr0111046    0    +    7    166    642
    chr4    995793    997931    FBtr0111044    0    +    28    683    2138
    chr4    995793    997931    FBtr0111045    0    +    28    683    2138
    chr4    1034029    1047719    FBtr0089223    0    -    5293    13394    13690
    ...


**Second File (BBB)**::

    chr4    90286    134453    FBtr0309803    0    +    657    29084    44167
    chr4    251355    266499    FBtr0089116    0    +    56    1296    15144
    chr4    252050    266506    FBtr0308086    0    +    56    1296    14456
    chr4    252050    266506    FBtr0308087    0    +    56    1296    14456
    chr4    252053    266528    FBtr0300796    0    +    56    1296    14475
    chr4    252053    266528    FBtr0300800    0    +    56    1296    14475
    chr4    252055    266528    FBtr0300798    0    +    56    1296    14473
    chr4    252055    266528    FBtr0300799    0    +    56    1296    14473
    chr4    252541    266528    FBtr0300797    0    +    56    1296    13987
    ...

**Third file (CCC)**::

    chr4    972167    979017    FBtr0310651    0    -    9927    6738    6850
    chr4    972186    979017    FBtr0089229    0    -    9927    6738    6831
    chr4    972186    979017    FBtr0089231    0    -    9927    6738    6831
    chr4    972186    979017    FBtr0089233    0    -    9927    6738    6831
    chr4    995793    996435    FBtr0111046    0    +    5    304    642
    chr4    995793    997931    FBtr0111044    0    +    17    714    2138
    chr4    995793    997931    FBtr0111045    0    +    17    714    2138
    chr4    1034029    1047719    FBtr0089223    0    -    17646    13536    13690
    ...


**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::

    key           AAA__V7   AAA__V8   AAA__V9   BBB__V7    BBB__V8    BBB__V9    CCC__V7   CCC__V8   CCC__V9
    FBtr0089116         0         0         0        56       1296      15144          0         0         0
    FBtr0089223      5293     13394     13690         0          0          0      17646     13536     13690
    FBtr0089229      3944      6428      6831         0          0          0       9927      6738      6831
    FBtr0089231      3944      6428      6831         0          0          0       9927      6738      6831
    FBtr0089233      3944      6428      6831         0          0          0       9927      6738      6831
    FBtr0111044        28       683      2138         0          0          0         17       714      2138
    FBtr0111045        28       683      2138         0          0          0         17       714      2138
    FBtr0111046         7       166       642         0          0          0          5       304       642
    FBtr0300796         0         0         0        56       1296      14475          0         0         0
    ...


# Input files need not be sorted.

-----

*multijoin* was written by A. Gordon (gordon at cshl dot edu)

</help>
</tool>