diff multijoin.xml @ 0:631dfde45073 draft default tip

First tool-shed public version
author gordon
date Tue, 09 Oct 2012 18:48:06 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multijoin.xml	Tue Oct 09 18:48:06 2012 -0400
@@ -0,0 +1,125 @@
+<tool id="cshl_multijoin" name="Multi-Join" version="0.1.1">
+  <description>(combine multiple files)</description>
+  <command interpreter="perl">multijoin
+		--key '$key_column'
+		--values '$value_columns'
+		--filler '$filler'
+		$ignore_dups
+		$output_header
+		$input_header
+#for $file in $files
+		'$file.filename'
+#end for
+		&gt; '$output'
+  </command>
+
+  <inputs>
+	<repeat name="files" title="file to join">
+		<param name="filename" label="Add file" type="data" format="txt" />
+	</repeat>
+
+	<param name="key_column" label="Common key column" type="integer"
+	   value="1" help="Usually gene-ID or other common value" />
+
+	<param name="value_columns" label="Column with values to join" type="text"
+	   value="2,3,4" help="Enter comma-separated list of columns, e.g. 3,6,8">
+	    <sanitizer>
+		    <valid initial="string.printable">
+			    <remove value="&apos;"/>
+		    </valid>
+	    </sanitizer>
+        </param>
+
+	<param name="output_header" type="boolean" checked="false" truevalue="--out-header" falsevalue="" label="Add header line to the output file" help="" />
+
+	<param name="input_header" type="boolean" checked="false" truevalue="--in-header" falsevalue="" label="Input files contain a header line (as first line)" help="" />
+
+	<param name="ignore_dups" type="boolean" checked="false" truevalue="--ignore-dups" falsevalue="" label="Ignore duplicated keys" help="If not set, duplicated keys in the same file will cause an error." />
+
+	<param name="filler" type="text" size="20" value="0" label="Value to put in unpaired (empty) fields">
+	    <sanitizer>
+		    <valid initial="string.printable">
+			    <remove value="&apos;"/>
+		    </valid>
+	    </sanitizer>
+        </param>
+
+  </inputs>
+  <outputs>
+    <data name="output" format="input" metadata_source="input1" />
+  </outputs>
+
+<help>
+**What it does**
+
+This tool joins multiple tabular files based on a common key column.
+
+-----
+
+**Example**
+
+To join three files, based on the 4th column, and keeping the 7th,8th,9th columns:
+
+**First file (AAA)**::
+
+    chr4	888449	890171	FBtr0308778	0	+	266	1527	1722
+    chr4	972167	979017	FBtr0310651	0	-	3944	6428	6850
+    chr4	972186	979017	FBtr0089229	0	-	3944	6428	6831
+    chr4	972186	979017	FBtr0089231	0	-	3944	6428	6831
+    chr4	972186	979017	FBtr0089233	0	-	3944	6428	6831
+    chr4	995793	996435	FBtr0111046	0	+	7	166	642
+    chr4	995793	997931	FBtr0111044	0	+	28	683	2138
+    chr4	995793	997931	FBtr0111045	0	+	28	683	2138
+    chr4	1034029	1047719	FBtr0089223	0	-	5293	13394	13690
+    ...
+
+
+**Second File (BBB)**::
+
+    chr4	90286	134453	FBtr0309803	0	+	657	29084	44167
+    chr4	251355	266499	FBtr0089116	0	+	56	1296	15144
+    chr4	252050	266506	FBtr0308086	0	+	56	1296	14456
+    chr4	252050	266506	FBtr0308087	0	+	56	1296	14456
+    chr4	252053	266528	FBtr0300796	0	+	56	1296	14475
+    chr4	252053	266528	FBtr0300800	0	+	56	1296	14475
+    chr4	252055	266528	FBtr0300798	0	+	56	1296	14473
+    chr4	252055	266528	FBtr0300799	0	+	56	1296	14473
+    chr4	252541	266528	FBtr0300797	0	+	56	1296	13987
+    ...
+
+**Third file (CCC)**::
+
+    chr4	972167	979017	FBtr0310651	0	-	9927	6738	6850
+    chr4	972186	979017	FBtr0089229	0	-	9927	6738	6831
+    chr4	972186	979017	FBtr0089231	0	-	9927	6738	6831
+    chr4	972186	979017	FBtr0089233	0	-	9927	6738	6831
+    chr4	995793	996435	FBtr0111046	0	+	5	304	642
+    chr4	995793	997931	FBtr0111044	0	+	17	714	2138
+    chr4	995793	997931	FBtr0111045	0	+	17	714	2138
+    chr4	1034029	1047719	FBtr0089223	0	-	17646	13536	13690
+    ...
+
+
+**Joining** the files, using **key column 4**, **value columns 7,8,9** and a **header line**, will return::
+
+    key           AAA__V7   AAA__V8   AAA__V9   BBB__V7    BBB__V8    BBB__V9    CCC__V7   CCC__V8   CCC__V9
+    FBtr0089116         0         0         0        56       1296      15144          0         0         0
+    FBtr0089223      5293     13394     13690         0          0          0      17646     13536     13690
+    FBtr0089229      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089231      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0089233      3944      6428      6831         0          0          0       9927      6738      6831
+    FBtr0111044        28       683      2138         0          0          0         17       714      2138
+    FBtr0111045        28       683      2138         0          0          0         17       714      2138
+    FBtr0111046         7       166       642         0          0          0          5       304       642
+    FBtr0300796         0         0         0        56       1296      14475          0         0         0
+    ...
+
+
+# Input files need not be sorted.
+
+-----
+
+*multijoin* was written by A. Gordon (gordon at cshl dot edu)
+
+</help>
+</tool>