Mercurial > repos > bgruening > join_files_on_column_fuzzy
view join_files_on_column_fuzzy.xml @ 3:6544babbdb8b draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/text_processing/join_files_on_column_fuzzy commit 3419a5a5e19a93369c8c20a39babe5636a309292
author | bgruening |
---|---|
date | Tue, 29 May 2018 07:49:14 -0400 |
parents | 5c31b0ea0734 |
children |
line wrap: on
line source
<tool id="join_files_on_column_fuzzy" name="Join two files" version="1.0.1"> <description> on column allowing a small difference </description> <requirements> <requirement type="package" version="3.6">python</requirement> </requirements> <command> <![CDATA[ python '$__tool_directory__/join_files_on_column_fuzzy.py' --f1 '$f1' --f2 '$f2' --c1 $c1 --c2 $c2 --outfile '$merged_file' $header $add_distance #if $merge_mode_select == 'closest': --closest #end if: --distance $distance --unit $units ]]> </command> <inputs> <param argument="--f1" type="data" optional="true" format="tabular" label="1st file" help=""/> <param argument="--c1" type="data_column" data_ref="f1" label="Column to use from 1st file" help="The file needs to be sorted by this column, ascending."/> <param argument="--f2" type="data" optional="true" format="tabular" label="2nd file" help=""/> <param argument="--c2" type="data_column" data_ref="f2" label="Column to use from 2nd file" help="The file needs to be sorted by this column, ascending."/> <param argument="--header" type="boolean" checked="false" truevalue="--header" falsevalue="" label="Does the input files contain a header line" /> <param argument="--add_distance" type="boolean" checked="false" truevalue="--add_distance" falsevalue="" label="Add an addional column with the calculated distance." /> <param name="merge_mode_select" type="select" label="Choose the mode of merging."> <option value="closest" selected="True">Best match (in case of multiple best matches, only the first one is reported)</option> <option value="distance">All matches within the defined distance</option> </param> <param name="units" display="radio" type="select" value="ppm_value" label="Choose the metrics of your distance" help="ppm is useful for very small differences"> <option value="absolute" selected="True">Absolute distance</option> <option value="ppm" >Distance in ppm</option> </param> <param name="distance" value="0.2" type="float" label="Allowed distance between the two values that will trigger a merge" help=""/> </inputs> <outputs> <data name="merged_file" format="tabular" /> </outputs> <tests> <test> <param name="f1" value="file1.tab" ftype="tabular"/> <param name="f2" value="file2.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="merge_mode_select" value="distance"/> <param name="distance" value="0.1"/> <param name="units" value="absolute"/> <output name="merged_file" file="no_header_result1.tab" /> </test> <test> <param name="f1" value="file1_header.tab" ftype="tabular"/> <param name="f2" value="file2_header.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="merge_mode_select" value="distance"/> <param name="distance" value="0.2"/> <param name="units" value="absolute"/> <param name="header" value="true"/> <output name="merged_file" file="header_result2.tab" /> </test> <test> <param name="f1" value="file1_header.tab" ftype="tabular"/> <param name="f2" value="file2_header.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="header" value="true"/> <param name="merge_mode_select" value="closest"/> <output name="merged_file" file="header_closest_result3.tab" /> </test> <test> <param name="f1" value="file1_ppm.tab" ftype="tabular"/> <param name="f2" value="file2_ppm.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="header" value="false"/> <param name="merge_mode_select" value="distance"/> <param name="distance" value="100"/> <param name="units" value="ppm"/> <output name="merged_file" file="no_header_ppm_result4.tab" /> </test> <test> <param name="f1" value="file1_header.tab" ftype="tabular"/> <param name="f2" value="file2_header.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="header" value="true"/> <param name="closest" value="true"/> <param name="add_distance" value="true"/> <output name="merged_file" file="header_closest_result5.tab" /> </test> <test> <param name="f1" value="file1_ppm.tab" ftype="tabular"/> <param name="f2" value="file2_ppm.tab" ftype="tabular"/> <param name="c1" value="1"/> <param name="c2" value="1"/> <param name="header" value="false"/> <param name="merge_mode_select" value="distance"/> <param name="distance" value="100"/> <param name="units" value="ppm"/> <param name="add_distance" value="true"/> <output name="merged_file" file="no_header_ppm_result6.tab" /> </test> </tests> <help> <![CDATA[ Join two files on a common column. It is necessary to provide an allowed difference between both values as the maximum absolute difference or maximum parts per million (ppm) for matching. Two modes are available: 1) In the **best match** mode: For each value in file 1 only the best matching value of file 2 is reported. In case of multiple best matches, only the closest match is reported. 2) In the **all matches** mode: All matches within the defined distance are reported. Be aware that file 1 is the template file and therefore the same value in file 2 can be matched to multiple values in file 1 ------ **Example** **Input file 1** :: 1 2 3 4 5 **Input file 2** :: 1.1 1.2 2.2 3.3 4.4 **Joined file1 and 2** with best match and absolute distance 0.3:: 1 1.1 0.1 2 2.2 0.2 3 3.3 0.3 **Joined file1 and 2** with all matches and absolute distance 0.3:: 1 1.1 0.1 1 1.2 0.2 2 2.2 0.2 3 3.3 0.3 ]]> </help> <citations> </citations> </tool>