comparison diffbind.xml @ 30:6b235ac52faf draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
author iuc
date Mon, 08 Jul 2024 18:31:37 +0000
parents 3aa2c26cc990
children
comparison
equal deleted inserted replaced
29:3aa2c26cc990 30:6b235ac52faf
1 <tool id="diffbind" name="DiffBind" version="2.10.0+galaxy0"> 1 <tool id="diffbind" name="DiffBind" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description> differential binding analysis of ChIP-Seq peak data</description> 2 <description> differential binding analysis of ChIP-Seq peak data</description>
3 <macros>
4 <token name="@TOOL_VERSION@">3.12.0</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">22.05</token>
7 </macros>
3 <xrefs> 8 <xrefs>
4 <xref type="bio.tools">diffbind</xref> 9 <xref type="bio.tools">diffbind</xref>
10 <xref type="bioconductor">diffbind</xref>
5 </xrefs> 11 </xrefs>
6 <requirements> 12 <requirements>
7 <requirement type="package" version="2.10.0">bioconductor-diffbind</requirement> 13 <requirement type="package" version="@TOOL_VERSION@">bioconductor-diffbind</requirement>
8 <requirement type="package" version="3.5.1">r-base</requirement> 14 <requirement type="package" version="1.20.4">r-getopt</requirement>
9 <requirement type="package" version="1.20.3">r-getopt</requirement> 15 <requirement type="package" version="4.0.16">bioconductor-edger</requirement>
10 <requirement type="package" version="0.2.20">r-rjson</requirement>
11 </requirements> 16 </requirements>
12 <stdio> 17 <stdio>
13 <regex match="Execution halted" 18 <regex match="Execution halted"
14 source="both" 19 source="both"
15 level="fatal" 20 level="fatal"
34 #set $temp_factor_names = list() 39 #set $temp_factor_names = list()
35 #set $temp_factor = list() 40 #set $temp_factor = list()
36 41
37 #for $g in $rep_group: 42 #for $g in $rep_group:
38 43
39 #set $peak_files = list() 44 #set $peak_files = dict()
40 #set $bam_files = list() 45 #set $bam_files = dict()
41 #set $bam_controls = list() 46 #set $bam_controls = dict()
42 47
43 #for $file in $g.peaks: 48 #for $file in $g.peaks:
44 #set $file_name = str($g.groupName) + "-" + re.sub('[^\w\-]', '_', str($file.element_identifier)) + "-peaks.bed" 49 #set $file_name = str($g.groupName) + "-" + re.sub('[^\w\-]', '_', str($file.element_identifier)) + "-peaks.bed"
45 ln -s '${file}' '${file_name}' && 50 ln -s '${file}' '${file_name}' &&
46 $peak_files.append($file_name) 51 #set $peak_files[str($file.element_identifier)] = str($file_name)
47 #end for 52 #end for
48 53
49 #for $bam in $g.bamreads: 54 #for $bam in $g.bamreads:
50 #set $bam_name = re.sub('[^\w\-]', '_', str($bam.element_identifier)) 55 #set $bam_name = re.sub('[^\w\-]', '_', str($bam.element_identifier))
51 #set $bam_file = $bam_name + "-bamreads.bam" 56 #set $bam_file = $bam_name + "-bamreads.bam"
52 #set $bam_index = $bam_name + "-bamreads.bai" 57 #set $bam_index = $bam_name + "-bamreads.bai"
53 ln -s '${bam}' '${bam_file}' && 58 ln -s '${bam}' '${bam_file}' &&
54 ln -s '${bam.metadata.bam_index}' '${bam_index}' && 59 ln -s '${bam.metadata.bam_index}' '${bam_index}' &&
55 $bam_files.append($bam_file) 60 #set $bam_files[str($bam.element_identifier)] = str($bam_file)
56 #end for 61 #end for
57 62
58 $temp_factor.append( {str($g.groupName): $peak_files} ) 63 #if len($peak_files.keys()) != len($bam_files.keys())
59 $temp_factor.append( {str($g.groupName): $bam_files} ) 64 >&2 echo "Group $g.groupName: same number of Peak and Bam files needs to be given" && exit 1 &&
65 #end if
66 $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($peak_files.items())]} )
67 $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_files.items())]} )
60 68
61 #if str( $g.bamcontrol ) != 'None': 69 #if str( $g.bamcontrol ) != 'None':
62 #for $ctrl in $g.bamcontrol: 70 #for $ctrl in $g.bamcontrol:
63 #set $ctrl_name = re.sub('[^\w\-]', '_', str($ctrl.element_identifier)) 71 #set $ctrl_name = re.sub('[^\w\-]', '_', str($ctrl.element_identifier))
64 #set $ctrl_file = $ctrl_name + "-bamcontrol.bam" 72 #set $ctrl_file = $ctrl_name + "-bamcontrol.bam"
65 #set ctrl_index = $ctrl_name + "-bamcontrol.bai" 73 #set $ctrl_index = $ctrl_name + "-bamcontrol.bai"
66 #if $ctrl_file not in json.dumps($temp_factor): 74 #if $ctrl_file not in json.dumps($temp_factor):
67 ln -s '${ctrl}' '${ctrl_file}' && 75 ln -s '${ctrl}' '${ctrl_file}' &&
68 ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' && 76 ln -s '${ctrl.metadata.bam_index}' '${ctrl_index}' &&
69 #end if 77 #end if
70 $bam_controls.append($ctrl_file) 78 #set $bam_controls[str($ctrl.element_identifier)] = str($ctrl_file)
71 #end for 79 #end for
72 $temp_factor.append( {str($g.groupName): $bam_controls} ) 80 #if len($peak_files.keys()) != len($bam_files.keys())
81 >&2 echo "Group $g.groupName: same number of Peak and Bam control files needs to be given" && exit 1 &&
82 #end if
83 $temp_factor.append( {str($g.groupName): [f[1] for f in sorted($bam_controls.items())]} )
73 #end if 84 #end if
74 85
75 #end for 86 #end for
76 87
77 $temp_factor.reverse() 88 $temp_factor.reverse()
80 91
81 Rscript '$__tool_directory__/diffbind.R' 92 Rscript '$__tool_directory__/diffbind.R'
82 93
83 -i '#echo json.dumps(temp_factor_names)#' 94 -i '#echo json.dumps(temp_factor_names)#'
84 -o '$outfile' 95 -o '$outfile'
96 -m '$method'
85 -t $th 97 -t $th
86 -f $out.format 98 -f $out.format
87 -p '$plots' 99 -p '$plots'
88 100 -O $minoverlap
89 #if $scorecol: 101 #if $use_blacklist:
90 -n "$scorecol" 102 -B
91 #end if 103 #end if
104 -n $scorecol
92 #if $lowerbetter: 105 #if $lowerbetter:
93 -l "$lowerbetter" 106 -l "$lowerbetter"
94 #end if 107 #end if
95 #if $summits: 108 #if $summits:
96 -s "$summits" 109 -s "$summits"
124 </param> 137 </param>
125 <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/> 138 <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/>
126 <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM files" help="Specify the Read BAM files used in the Peak calling. The input order of the BAM files for the samples MUST match the input order of the peaks files."/> 139 <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM files" help="Specify the Read BAM files used in the Peak calling. The input order of the BAM files for the samples MUST match the input order of the peaks files."/>
127 <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM files" help="If specifying a control BAM file, all samples are required to specify one, see Help section below. The input order of the BAM files for the samples MUST match the input order of the peaks files."/> 140 <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM files" help="If specifying a control BAM file, all samples are required to specify one, see Help section below. The input order of the BAM files for the samples MUST match the input order of the peaks files."/>
128 </repeat> 141 </repeat>
129 142 <param name="method" type="select" label="Underlying method by which to analyze differential binding affinity">
130 <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"> 143 <option value="DBA_DESEQ2" selected="True">DESeq2</option>
144 <option value="DBA_EDGER">edgeR</option>
145 </param>
146 <param name="use_blacklist" type="boolean" truevalue="True" falsevalue="" checked="False" label="Filters peak intervals that overlap a blacklist from ENCODE" help="Works with human, mouse, worm and fly. Assembly version is determined from the BAM files." />
147 <param name="minoverlap" type="integer" min="1" value="2" label="Only include peaks in at least this many peaksets in the main binding matrix">
148 <sanitizer>
149 <valid initial="string.digits"/>
150 </sanitizer>
151 </param>
152 <param name="scorecol" type="integer" min="0" value="5" label="Score Column" help="Column in peak files that contains peak scores. Default: 5 (narrowPeak)">
131 <sanitizer> 153 <sanitizer>
132 <valid initial="string.digits"/> 154 <valid initial="string.digits"/>
133 </sanitizer> 155 </sanitizer>
134 </param> 156 </param>
135 <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" /> 157 <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" />
212 <assert_contents> 234 <assert_contents>
213 <has_text text="SessionInfo"/> 235 <has_text text="SessionInfo"/>
214 </assert_contents> 236 </assert_contents>
215 </output> 237 </output>
216 </test> 238 </test>
239 <!-- Ensure EDGER works -->
240 <test expect_num_outputs="3">
241 <repeat name="rep_group">
242 <param name="groupName" value="Resistant"/>
243 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
244 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
245 </repeat>
246 <repeat name="rep_group">
247 <param name="groupName" value="Responsive"/>
248 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
249 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
250 </repeat>
251 <param name="scorecol" value="5" />
252 <param name="method" value="DBA_EDGER" />
253 <param name="format" value="interval"/>
254 <param name="pdf" value="True" />
255 <param name="binding_matrix" value="True" />
256 <param name="rscript" value="False"/>
257 <output name="outfile" ftype="interval" value="out_diffbind_edger.interval" />
258 <output name="plots" value="out_plots_edger.pdf" compare="sim_size" />
259 <output name="binding_matrix" value="out_binding_matrix_edger.tab" />
260 </test>
217 <!-- Ensure control BAMs input works --> 261 <!-- Ensure control BAMs input works -->
218 <test expect_num_outputs="1"> 262 <test expect_num_outputs="1">
219 <repeat name="rep_group"> 263 <repeat name="rep_group">
220 <param name="groupName" value="Resistant"/> 264 <param name="groupName" value="Resistant"/>
221 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> 265 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
261 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> 305 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
262 </repeat> 306 </repeat>
263 <param name="scorecol" value="5" /> 307 <param name="scorecol" value="5" />
264 <param name="format" value="tabular"/> 308 <param name="format" value="tabular"/>
265 <output name="outfile" ftype="tabular" file="out_diffbind.tab" /> 309 <output name="outfile" ftype="tabular" file="out_diffbind.tab" />
310 </test>
311 <!-- Ensure minoverlap works -->
312 <test expect_num_outputs="1">
313 <repeat name="rep_group">
314 <param name="groupName" value="Resistant"/>
315 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
316 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
317 </repeat>
318 <repeat name="rep_group">
319 <param name="groupName" value="Responsive"/>
320 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
321 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
322 </repeat>
323 <param name="minoverlap" value="1" />
324 <param name="scorecol" value="5" />
325 <param name="format" value="tabular"/>
326 <output name="outfile" ftype="tabular" file="out_diffbind_minoverlap1.tab" />
327 </test>
328 <!-- Ensure blacklist filtering works -->
329 <test expect_num_outputs="1">
330 <repeat name="rep_group">
331 <param name="groupName" value="Resistant"/>
332 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/>
333 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" />
334 </repeat>
335 <repeat name="rep_group">
336 <param name="groupName" value="Responsive"/>
337 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/>
338 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" />
339 </repeat>
340 <param name="use_blacklist" value="True"/>
341 <param name="scorecol" value="5" />
342 <param name="format" value="tabular"/>
343 <output name="outfile" ftype="tabular" file="out_diffbind_blacklist.tab" />
266 </test> 344 </test>
267 </tests> 345 </tests>
268 <help><![CDATA[ 346 <help><![CDATA[
269 347
270 .. class:: infomark 348 .. class:: infomark
305 position, and usually a score of some type indicating confidence in, or strength of, the peak. 383 position, and usually a score of some type indicating confidence in, or strength of, the peak.
306 Associated with each peakset are metadata relating to the experiment from which the peakset 384 Associated with each peakset are metadata relating to the experiment from which the peakset
307 was derived. Additionally, files containing mapped sequencing reads (BAM files) need to 385 was derived. Additionally, files containing mapped sequencing reads (BAM files) need to
308 be associated with each peakset (one for the ChIP data, and optionally another representing 386 be associated with each peakset (one for the ChIP data, and optionally another representing
309 a control sample) 387 a control sample)
388
389 Inputs for a group will be sorted by identifier before processing. For each group the corresponding
390 sets of peak and BAM files need to be provided. Ideally this is accomplished by providing the data in
391 collections.
392
310 393
311 **Groups** 394 **Groups**
312 395
313 You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above. 396 You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above.
314 397
401 Example - **BED format**: 484 Example - **BED format**:
402 485
403 ===== ====== ====== ======== ===== ====== 486 ===== ====== ====== ======== ===== ======
404 Chrom Start End Name Score Strand 487 Chrom Start End Name Score Strand
405 ===== ====== ====== ======== ===== ====== 488 ===== ====== ====== ======== ===== ======
406 chr18 394599 396513 DiffBind 0 \. 489 chr18 394599 396513 DiffBind 0 \.
407 chr18 111566 112005 DiffBind 0 \. 490 chr18 111566 112005 DiffBind 0 \.
408 chr18 346463 347342 DiffBind 0 \. 491 chr18 346463 347342 DiffBind 0 \.
409 chr18 399013 400382 DiffBind 0 \. 492 chr18 399013 400382 DiffBind 0 \.
410 chr18 371109 372102 DiffBind 0 \. 493 chr18 371109 372102 DiffBind 0 \.
411 ===== ====== ====== ======== ===== ====== 494 ===== ====== ====== ======== ===== ======
412 495
413 Example - **Tabular format**: 496 Example - **Tabular format**:
414 497
415 ===== ====== ====== ======== ===== ====== ==== =============== ============== ===== ======== ======== 498 ===== ====== ====== ======== ===== ====== ==== =============== ============== ===== ======== ========