annotate cgatools_suite/tools/cgatools/join.xml @ 9:326dbb82d58b draft

Uploaded
author bcrain-completegenomics
date Wed, 06 Jun 2012 17:04:11 -0400
parents 96829b1b73ea
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
1 <tool id="cga_join" name="join(beta)" version="0.0.1">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
2
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
3 <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
4
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
5 <requirements>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
6 <requirement type="binary">cgatools</requirement>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
7 </requirements>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
8
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
9 <command> <!--run executable-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
10 cgatools join --beta
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
11 --input $input1
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
12 --input $input2
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
13 --output $output
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
14 --output-mode $outmode
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
15 $dump
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
16 --select $col
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
17 #for $m in $matched <!--get all matched columns-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
18 --match ${m.match}
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
19 #end for
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
20 </command>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
21
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
22 <outputs>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
23 <data format="tabular" name="output" />
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
24 </outputs>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
25
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
26 <inputs>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
27 <!--form field to select input file A-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
28 <param name="input1" type="data" format="tabular" label="Select first input file (A)">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
29 <validator type="unspecified_build" />
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
30 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
31 metadata_name="dbkey" metadata_column="0"
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
32 message="cgatools is not currently available for this build."/>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
33 </param>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
34
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
35 <!--form field to select input file B-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
36 <param name="input2" type="data" format="tabular" label="Select second input file (B)">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
37 <validator type="unspecified_build" />
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
38 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
39 metadata_name="dbkey" metadata_column="0"
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
40 message="cgatools is not currently available for this build."/>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
41 </param>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
42
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
43 <!--form field to specify columns to match-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
44 <repeat name="matched" title="Matched column">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
45 <param name="match" type="text" label="Enter column A:column B"/>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
46 </repeat>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
47
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
48 <!--form field to specify columns to print-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
49 <param name="col" type="text" value="A.*,B.*" label="Specify columns to print from file A and B in format A.col_name1,A.col_name2,B.col_name1" />
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
50
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
51 <!--form field to select output-mode-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
52 <param name="outmode" type="select" label="Select output mode">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
53 <option value="full" selected="true">full (1 line for each match of records in A and B)</option>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
54 <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
55 <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
56 </param>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
57
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
58 <!--form field to select columns to match-->
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
59 <param name="dump" type="select" label="Select records to print">
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
60 <option value="--always-dump" selected="true">print all records of A even if not matched in B</option>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
61 <option value="">print only records of A that are matched in B</option>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
62 </param>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
63 </inputs>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
64
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
65 <help>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
66
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
67 **What it does**
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
68
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
69 This tool joins two tab-delimited files based on equal fields or overlapping regions.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
70
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
71 cgatools: http://sourceforge.net/projects/cgatools/files/
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
72
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
73 -----
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
74
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
75 **cgatools Manual**::
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
76
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
77 COMMAND NAME
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
78 join - Joins two tab-delimited files based on equal fields or overlapping regions.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
79
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
80 DESCRIPTION
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
81 Joins two tab-delimited files based on equal fields or overlapping regions.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
82 By default, an output record is produced for each match found between file
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
83 A and file B, but output format can be controlled by the --output-mode
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
84 parameter.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
85
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
86 OPTIONS
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
87 -h [ --help ]
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
88 Print this help message.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
89
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
90 --beta
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
91 This is a beta command. To run this command, you must pass the --beta
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
92 flag.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
93
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
94 --input arg
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
95 File name to use as input (may be passed in as arguments at the end of
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
96 the command), or omitted for stdin). There must be exactly two input
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
97 files to join. If only one file is specified by name, file A is taken
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
98 to be stdin and file B is the named file. File B is read fully into
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
99 memory, and file A is streamed. File A's columns appear first in the
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
100 output.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
101
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
102 --output arg (=STDOUT)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
103 The output file name (may be omitted for stdout).
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
104
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
105 --match arg
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
106 A match specification, which is a column from A and a column from B
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
107 separated by a colon.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
108
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
109 --overlap arg
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
110
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
111 -m [ --output-mode ] arg (=full)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
112 Output mode, one of the following:
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
113 full Print an output record for each match found between
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
114 file A and file B.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
115 compact Print at most one record for each record of file A,
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
116 joining the file B values by a semicolon and
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
117 suppressing repeated B values and empty B values.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
118 compact-pct Same as compact, but for each distinct B value,
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
119 annotate with the percentage of the A record that is
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
120 overlapped by B records with that B value. Percentage
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
121 is rounded up to nearest integer.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
122
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
123 --overlap-mode arg (=strict)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
124 Overlap mode, one of the following:
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
125 strict Range A and B overlap if A.begin &lt; B.end and
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
126 B.begin &lt; A.end.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
127 allow-abutting-points Range A and B overlap they meet the strict
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
128 requirements, or if A.begin &lt;= B.end and
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
129 B.begin &lt;= A.end and either A or B has zero
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
130 length.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
131
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
132 --select arg (=A.*,B.*)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
133 Set of fields to select for output.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
134
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
135 -a [ --always-dump ]
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
136 Dump every record of A, even if there are no matches with file B.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
137
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
138 --overlap-fraction-A arg (=0)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
139 Minimum fraction of A region overlap for filtering output.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
140
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
141 --boundary-uncertainty-A arg (=0)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
142 Boundary uncertainty for overlap filtering. Specifically, records
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
143 failing the following predicate are filtered away: overlap &gt;=
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
144 overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
145
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
146 --overlap-fraction-B arg (=0)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
147 Minimum fraction of B region overlap for filtering output.
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
148
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
149 --boundary-uncertainty-B arg (=0)
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
150 Boundary uncertainty for overlap filtering. Specifically, records
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
151 failing the following predicate are filtered away: overlap &gt;=
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
152 overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
153
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
154 SUPPORTED FORMAT_VERSION
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
155 Any
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
156 </help>
96829b1b73ea Uploaded
bcrain-completegenomics
parents:
diff changeset
157 </tool>