|
7
|
1 <tool id="cga_join" name="join(beta)" version="0.0.1">
|
|
|
2
|
|
|
3 <description>two tsv files based on equal fields or overlapping regions.</description> <!--adds description in toolbar-->
|
|
|
4
|
|
|
5 <requirements>
|
|
|
6 <requirement type="binary">cgatools</requirement>
|
|
|
7 </requirements>
|
|
|
8
|
|
|
9 <command> <!--run executable-->
|
|
|
10 cgatools join --beta
|
|
|
11 --input $input1
|
|
|
12 --input $input2
|
|
|
13 --output $output
|
|
|
14 --output-mode $outmode
|
|
|
15 $dump
|
|
|
16 --select $col
|
|
|
17 #for $m in $matched <!--get all matched columns-->
|
|
|
18 --match ${m.match}
|
|
|
19 #end for
|
|
|
20 </command>
|
|
|
21
|
|
|
22 <outputs>
|
|
|
23 <data format="tabular" name="output" />
|
|
|
24 </outputs>
|
|
|
25
|
|
|
26 <inputs>
|
|
|
27 <!--form field to select input file A-->
|
|
|
28 <param name="input1" type="data" format="tabular" label="Select first input file (A)">
|
|
|
29 <validator type="unspecified_build" />
|
|
|
30 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
|
31 metadata_name="dbkey" metadata_column="0"
|
|
|
32 message="cgatools is not currently available for this build."/>
|
|
|
33 </param>
|
|
|
34
|
|
|
35 <!--form field to select input file B-->
|
|
|
36 <param name="input2" type="data" format="tabular" label="Select second input file (B)">
|
|
|
37 <validator type="unspecified_build" />
|
|
|
38 <validator type="dataset_metadata_in_file" filename="cg_crr_files.loc"
|
|
|
39 metadata_name="dbkey" metadata_column="0"
|
|
|
40 message="cgatools is not currently available for this build."/>
|
|
|
41 </param>
|
|
|
42
|
|
|
43 <!--form field to specify columns to match-->
|
|
|
44 <repeat name="matched" title="Matched column">
|
|
|
45 <param name="match" type="text" label="Enter column A:column B"/>
|
|
|
46 </repeat>
|
|
|
47
|
|
|
48 <!--form field to specify columns to print-->
|
|
|
49 <param name="col" type="text" value="A.*,B.*" label="Specify columns to print from file A and B in format A.col_name1,A.col_name2,B.col_name1" />
|
|
|
50
|
|
|
51 <!--form field to select output-mode-->
|
|
|
52 <param name="outmode" type="select" label="Select output mode">
|
|
|
53 <option value="full" selected="true">full (1 line for each match of records in A and B)</option>
|
|
|
54 <option value="compact">compact (1 line for each record in A, joining multiple records in B by semicolon)</option>
|
|
|
55 <option value="compact-pct">compact-pct (same as compact, annotated with % overlap)</option>
|
|
|
56 </param>
|
|
|
57
|
|
|
58 <!--form field to select columns to match-->
|
|
|
59 <param name="dump" type="select" label="Select records to print">
|
|
|
60 <option value="--always-dump" selected="true">print all records of A even if not matched in B</option>
|
|
|
61 <option value="">print only records of A that are matched in B</option>
|
|
|
62 </param>
|
|
|
63 </inputs>
|
|
|
64
|
|
|
65 <help>
|
|
|
66
|
|
|
67 **What it does**
|
|
|
68
|
|
|
69 This tool joins two tab-delimited files based on equal fields or overlapping regions.
|
|
|
70
|
|
|
71 cgatools: http://sourceforge.net/projects/cgatools/files/
|
|
|
72
|
|
|
73 -----
|
|
|
74
|
|
|
75 **cgatools Manual**::
|
|
|
76
|
|
|
77 COMMAND NAME
|
|
|
78 join - Joins two tab-delimited files based on equal fields or overlapping regions.
|
|
|
79
|
|
|
80 DESCRIPTION
|
|
|
81 Joins two tab-delimited files based on equal fields or overlapping regions.
|
|
|
82 By default, an output record is produced for each match found between file
|
|
|
83 A and file B, but output format can be controlled by the --output-mode
|
|
|
84 parameter.
|
|
|
85
|
|
|
86 OPTIONS
|
|
|
87 -h [ --help ]
|
|
|
88 Print this help message.
|
|
|
89
|
|
|
90 --beta
|
|
|
91 This is a beta command. To run this command, you must pass the --beta
|
|
|
92 flag.
|
|
|
93
|
|
|
94 --input arg
|
|
|
95 File name to use as input (may be passed in as arguments at the end of
|
|
|
96 the command), or omitted for stdin). There must be exactly two input
|
|
|
97 files to join. If only one file is specified by name, file A is taken
|
|
|
98 to be stdin and file B is the named file. File B is read fully into
|
|
|
99 memory, and file A is streamed. File A's columns appear first in the
|
|
|
100 output.
|
|
|
101
|
|
|
102 --output arg (=STDOUT)
|
|
|
103 The output file name (may be omitted for stdout).
|
|
|
104
|
|
|
105 --match arg
|
|
|
106 A match specification, which is a column from A and a column from B
|
|
|
107 separated by a colon.
|
|
|
108
|
|
|
109 --overlap arg
|
|
|
110
|
|
|
111 -m [ --output-mode ] arg (=full)
|
|
|
112 Output mode, one of the following:
|
|
|
113 full Print an output record for each match found between
|
|
|
114 file A and file B.
|
|
|
115 compact Print at most one record for each record of file A,
|
|
|
116 joining the file B values by a semicolon and
|
|
|
117 suppressing repeated B values and empty B values.
|
|
|
118 compact-pct Same as compact, but for each distinct B value,
|
|
|
119 annotate with the percentage of the A record that is
|
|
|
120 overlapped by B records with that B value. Percentage
|
|
|
121 is rounded up to nearest integer.
|
|
|
122
|
|
|
123 --overlap-mode arg (=strict)
|
|
|
124 Overlap mode, one of the following:
|
|
|
125 strict Range A and B overlap if A.begin < B.end and
|
|
|
126 B.begin < A.end.
|
|
|
127 allow-abutting-points Range A and B overlap they meet the strict
|
|
|
128 requirements, or if A.begin <= B.end and
|
|
|
129 B.begin <= A.end and either A or B has zero
|
|
|
130 length.
|
|
|
131
|
|
|
132 --select arg (=A.*,B.*)
|
|
|
133 Set of fields to select for output.
|
|
|
134
|
|
|
135 -a [ --always-dump ]
|
|
|
136 Dump every record of A, even if there are no matches with file B.
|
|
|
137
|
|
|
138 --overlap-fraction-A arg (=0)
|
|
|
139 Minimum fraction of A region overlap for filtering output.
|
|
|
140
|
|
|
141 --boundary-uncertainty-A arg (=0)
|
|
|
142 Boundary uncertainty for overlap filtering. Specifically, records
|
|
|
143 failing the following predicate are filtered away: overlap >=
|
|
|
144 overlap-fraction-A * ( A-range-length - boundary-uncertainty-A )
|
|
|
145
|
|
|
146 --overlap-fraction-B arg (=0)
|
|
|
147 Minimum fraction of B region overlap for filtering output.
|
|
|
148
|
|
|
149 --boundary-uncertainty-B arg (=0)
|
|
|
150 Boundary uncertainty for overlap filtering. Specifically, records
|
|
|
151 failing the following predicate are filtered away: overlap >=
|
|
|
152 overlap-fraction-B * ( B-range-length - boundary-uncertainty-B )
|
|
|
153
|
|
|
154 SUPPORTED FORMAT_VERSION
|
|
|
155 Any
|
|
|
156 </help>
|
|
|
157 </tool>
|