0
|
1 <?xml version="1.0"?>
|
|
2 <tool id="repmatch_gff3" name="RepMatch" version="@WRAPPER_VERSION@.0">
|
|
3 <description>Match paired peaks from two or more replicates</description>
|
|
4 <macros>
|
|
5 <import>repmatch_gff3_macros.xml</import>
|
|
6 </macros>
|
|
7 <expand macro="requirements" />
|
|
8 <command>
|
|
9 python $__tool_directory__/repmatch_gff3.py
|
|
10 #for $i in $input:
|
|
11 --input "${i}" "${i.hid}"
|
|
12 #end for
|
|
13 --method $method
|
|
14 --distance $distance
|
|
15 --replicates $replicates
|
|
16 --output_files $output_files_cond.output_files
|
10
|
17 --output_matched_peaks "$output_matched_peaks"
|
|
18 #if str($output_files_cond.output_files) in ["all", "matched_peaks_unmatched_peaks"]:
|
|
19 --output_unmatched_peaks "$output_unmatched_peaks"
|
0
|
20 #end if
|
|
21 #if str($output_files_cond.output_files) =="all":
|
|
22 --output_detail "$output_detail"
|
10
|
23 --output_statistics_table "$output_statistics_table"
|
|
24 --output_statistics_histogram "$output_statistics_histogram"
|
0
|
25 #end if
|
11
|
26 #if str($advanced_options_cond.advanced_options) == "on":
|
|
27 --step $advanced_options_cond.step
|
|
28 --low_limit $advanced_options_cond.low_limit
|
|
29 --up_limit $advanced_options_cond.up_limit
|
|
30 #end if
|
0
|
31 </command>
|
|
32 <inputs>
|
|
33 <param name="input" type="data" format="gff" multiple="True" min="2" label="Match paired peaks on" />
|
|
34 <param name="method" type="select" label="Method of finding match">
|
|
35 <option value="closest" selected="True">Closest</option>
|
|
36 <option value="largest">Largest</option>
|
|
37 <option value="all">All</option>
|
|
38 </param>
|
11
|
39 <param name="distance" type="integer" value="50" min="0" label="Maximum distance between peaks in different replicates to allow merging" />
|
0
|
40 <param name="replicates" type="integer" value="2" min="2" label="Minimum number of replicates that must be matched for merging to occur" />
|
|
41 <conditional name="output_files_cond">
|
|
42 <param name="output_files" type="select" label="Restrict output to" help="Statistics will always be generated." >
|
|
43 <option value="all" selected="True">no restrictions (output everything)</option>
|
10
|
44 <option value="matched_peaks">matched paired peaks only</option>
|
|
45 <option value="matched_peaks_unmatched_peaks">matched paired peaks and unmatched paired peaks only</option>
|
0
|
46 </param>
|
10
|
47 <when value="matched_peaks" />
|
|
48 <when value="matched_peaks_unmatched_peaks" />
|
7
|
49 <when value="all" />
|
0
|
50 </conditional>
|
11
|
51 <conditional name="advanced_options_cond">
|
|
52 <param name="advanced_options" type="select" label="Advanced options">
|
|
53 <option value="off" selected="true">Hide advanced options</option>
|
|
54 <option value="on">Display advanced options</option>
|
|
55 </param>
|
|
56 <when value="on">
|
|
57 <param name="step" type="integer" value="0" min="0" label="Step size" help="Distance for each iteration" />
|
|
58 <param name="low_limit" type="integer" value="-1000" label="Lower limit for Crick-Watson distance filter" />
|
|
59 <param name="up_limit" type="integer" value="1000" label="Upper limit for Crick-Watson distance filter" />
|
|
60 </when>
|
|
61 <when value="off" />
|
|
62 </conditional>
|
0
|
63 </inputs>
|
|
64 <outputs>
|
10
|
65 <data name="output_statistics_table" format="tabular" label="Statistics Table: ${tool.name} on ${on_string}">
|
0
|
66 <filter>output_files_cond["output_files"] == "all"</filter>
|
|
67 </data>
|
10
|
68 <data name="output_statistics_histogram" format="pdf" label="Statistics Histogram: ${tool.name} on ${on_string}">
|
|
69 <filter>output_files_cond["output_files"] == "all"</filter>
|
|
70 </data>
|
|
71 <data name="output_detail" format="tabular" label="Data D: ${tool.name} on ${on_string}">
|
0
|
72 <filter>output_files_cond["output_files"] == "all"</filter>
|
|
73 </data>
|
10
|
74 <data name="output_unmatched_peaks" format="tabular" label="Data UP: ${tool.name} on ${on_string}">
|
|
75 <filter>output_files_cond["output_files"] in ["all", "matched_peaks_unmatched_peaks"]</filter>
|
|
76 </data>
|
|
77 <data name="output_matched_peaks" format="gff" label="Data MP: ${tool.name} on ${on_string}" />
|
0
|
78 </outputs>
|
|
79 <tests>
|
10
|
80 <param name="input" value="closest_matched_pairs_input1.gff" ftype="gff" />
|
|
81 <param name="input" value="largest_matched_pairs_input1.gff" ftype="gff" />
|
11
|
82 <param name="method" value="closest" />
|
10
|
83 <param name="distance" value="50" />
|
11
|
84 <param name="replicates" value="2" />
|
|
85 <param name="output_files" value="all" />
|
0
|
86 <param name="step" value="0" />
|
|
87 <param name="low_limit" value="-1000" />
|
|
88 <param name="up_limit" value="1000" />
|
10
|
89 <output name="output_statistics_table" file="statistics_table_out1.tabular" ftype="tabular" />
|
|
90 <output name="output_statistics_histogram" file="statistics_histogram_out1.pdf" ftype="pdf" compare="sim_size" />
|
|
91 <output name="output_detail" file="detail_out1.tabular" ftype="tabular" />
|
|
92 <output name="output_unmatched_peaks" file="unmatched_peaks_out1.tabular" ftype="tabular" />
|
|
93 <output name="output_matched_peaks" file="matched_peaks_out1.gff" ftype="gff" />
|
0
|
94 </tests>
|
|
95 <help>
|
|
96 **What it does**
|
|
97
|
12
|
98 RepMatch accepts two or more input datasets, and starts by defining peak-pair midpoints in the first dataset. It then
|
|
99 discovers all peak-pair midpoints in the second dataset that are within the distance, defined by the tool's **Maximum
|
|
100 distance between peaks in different replicates to allow merging** parameter, from the peak-pair midpoint coordinate in
|
|
101 the first dataset. When encountering multiple candidates to match (one-to-many), RepMatch uses the method defined by
|
|
102 the tool's **Method of finding match** parameter so that there is at most only a one-to-one match across the two datasets.
|
|
103 This method provides the following options:
|
|
104
|
|
105 * **closest** - matches only the closest one in bp distance.
|
|
106 * **largest** - matches the one that contain the most number of reads.
|
|
107 * **all** - both methods are run separately.
|
|
108
|
|
109 RepMatch matching is an iterative process, as it attempts to find the centroid coordinate amongst all replicates. As such,
|
|
110 the centroid is the point of reference for "distqnce" and "closest". This process can be sped up by increasing the tool's
|
|
111 **Step size** parameter.
|
|
112
|
|
113 The minimum number of replicates that can be matched for a match to occur is defined by the tool's **Minimum number of
|
|
114 replicates that must be matched for merging to occur** parameter. Additional filters can be applied using the tool's
|
|
115 **Advanced options**, including a lower and upper limit for the C-W distance.
|
|
116
|
|
117 .. image:: $PATH_TO_IMAGES/repmatch.png
|
0
|
118
|
10
|
119 -----
|
0
|
120
|
|
121 **Options**
|
|
122
|
12
|
123 * **Distance** - Maximum distance for discovering all peak-pair midpoints in a second dataset relative to the peak-pair midpoints in the first dataset
|
|
124 * **Method** - Method to use when encountering multiple candidates to match so that there is at most only a one-to-one match across the two datasets.
|
|
125 * **Step Size** - Distance for each iteration.
|
|
126 * **Replicates** - Minimum number of replicates that can be matched for a match to occur. This value must be at least 2.
|
|
127 * **Lower Limit** - Lower limit for the Crick-Watson distance filter.
|
|
128 * **Upper Limit** - Upper limit for the Crick-Watson distance filter.
|
10
|
129
|
13
|
130 -----
|
|
131
|
|
132 **Output Data Files**
|
|
133
|
|
134 * **Data MP** - gff file consisting of only peak pairs
|
|
135
|
|
136 - Columns are **chr**, **script**, **blank**, **peak start**, **peak end**, **blank**, **normalized tag counts**, **blank** and **info**.
|
|
137 - Peak start and end are separated by one coordinate.
|
|
138 - Normalized tag is the occupancy averaged across replicates.
|
|
139 - Attributes include C-W distance, sum total of tag counts, number of replicates merged.
|
|
140
|
|
141 * **Data D** - tabular file consisting of the list of all matched replicates.
|
|
142 * **Data UP** - tabular file consisting of all unmatched peak-pairs.
|
|
143
|
|
144 **Output Statistics Files**
|
|
145
|
|
146 * **Statistics Table** - tabular file providing the description key of **Data D**.
|
|
147 * **Statistics Histogram** - graph of the number of matched locations having the indicated replicate counts.
|
|
148
|
|
149 **Comments on Replicates**
|
|
150
|
|
151 Three types of replicates may be considered. Biological replicates represent independently collected biological samples.
|
|
152 At least two biological replicate must be performed for each experiment from which a conclusion is being drawn, and the
|
|
153 conclusion must be evident in both biological replicates when analyzed separately. Technical replicates represent a re-run
|
|
154 of the assay on the same biological material. This is usually done when one replicate fails to produce quality data, and is
|
|
155 used to replace that earlier replicate. Sequencing replicates represent additional sequencing of the same successful library
|
|
156 in order to obtain more reads should the analysis require it. The reads from individual sequencing replicates are usually
|
|
157 merged without need for separate analysis.
|
|
158
|
0
|
159 </help>
|
|
160 <expand macro="citations" />
|
|
161 </tool>
|