Mercurial > repos > bgruening > diffbind
comparison diffbind.xml @ 23:393393c58c35 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit cc4c1c4131518b9cbf986a1f252767ff73ca938e
author | iuc |
---|---|
date | Sat, 07 Apr 2018 15:45:03 -0400 |
parents | 51f0f4df83c2 |
children | 15bbd86c6c7b |
comparison
equal
deleted
inserted
replaced
22:51f0f4df83c2 | 23:393393c58c35 |
---|---|
1 <tool id="diffbind" name="DiffBind" version="2.6.5.0"> | 1 <tool id="diffbind" name="DiffBind" version="2.6.6.1"> |
2 <description> differential binding analysis of ChIP-Seq peak data</description> | 2 <description> differential binding analysis of ChIP-Seq peak data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="2.6.5">bioconductor-diffbind</requirement> | 4 <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> |
5 <requirement type="package" version="1.20.0">r-getopt</requirement> | 5 <requirement type="package" version="1.20.0">r-getopt</requirement> |
6 <!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"--> | 6 <requirement type="package" version="0.2.15">r-rjson</requirement> |
7 <requirement type="package" version="0.10.11">r-rmysql</requirement> | |
8 </requirements> | 7 </requirements> |
9 <stdio> | 8 <stdio> |
10 <regex match="Execution halted" | 9 <regex match="Execution halted" |
11 source="both" | 10 source="both" |
12 level="fatal" | 11 level="fatal" |
19 source="both" | 18 source="both" |
20 level="fatal" | 19 level="fatal" |
21 description="An undefined error occured, please check your intput carefully and contact your administrator." /> | 20 description="An undefined error occured, please check your intput carefully and contact your administrator." /> |
22 </stdio> | 21 </stdio> |
23 <version_command><![CDATA[ | 22 <version_command><![CDATA[ |
24 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | 23 echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ") |
25 ]]></version_command> | 24 ]]></version_command> |
26 <command><![CDATA[ | 25 <command><![CDATA[ |
27 ## seems that diffbind also needs file extensions to work properly | 26 #import re |
28 #set $counter = 1 | 27 #import json |
29 #for $sample in $samples: | 28 |
30 ln -s $sample.bamreads #echo str($counter) + "_bamreads.bam"# && | 29 ## Adapted from DESeq2 wrapper |
31 ln -s ${sample.bamreads.metadata.bam_index} #echo str($counter) + "_bamreads.bai"# && | 30 #set $temp_factor_names = list() |
32 #if str( $sample.bamcontrol ) != 'None': | 31 #set $temp_factor = list() |
33 ln -s $sample.bamcontrol #echo str($counter) + "_bamcontrol.bam"# && | 32 |
34 ln -s ${sample.bamcontrol.metadata.bam_index} #echo str($counter) + "_bamcontrol.bai"# && | 33 #for $g in $rep_group: |
35 #end if | 34 |
36 #set $counter = $counter + 1 | 35 #set $peak_files = list() |
36 #set $bam_files = list() | |
37 #set $bam_controls = list() | |
38 | |
39 #for $file in $g.peaks: | |
40 #set $file_name = re.sub('[^\w\-\s]', '_', str($file.element_identifier)) | |
41 ln -s '${file}' ${g.groupName}-${file_name}-peaks.bed && | |
42 $peak_files.append(str($g.groupName) + '-' + $file_name + '-peaks.bed') | |
43 #end for | |
44 | |
45 #for $bam in $g.bamreads: | |
46 #set $bam_name = re.sub('[^\w\-\s]', '_', str($bam.element_identifier)) | |
47 ln -s '${bam}' ${bam_name}-bamreads.bam && | |
48 ln -s ${bam.metadata.bam_index} ${bam_name}-bamreads.bai && | |
49 $bam_files.append($bam_name + '-bamreads.bam') | |
50 #end for | |
51 | |
52 $temp_factor.append( {str($g.groupName): $peak_files} ) | |
53 $temp_factor.append( {str($g.groupName): $bam_files} ) | |
54 | |
55 #if str( $g.bamcontrol ) != 'None': | |
56 #for $ctrl in $g.bamcontrol: | |
57 #set $ctrl_name = re.sub('[^\w\-\s]', '_', str($ctrl.element_identifier)) | |
58 ln -s '${ctrl}' ${g.groupName}-${ctrl_name}-bamcontrol.bam && | |
59 ln -s ${ctrl.metadata.bam_index} ${g.groupName}-${ctrl_name}-bamcontrol.bai && | |
60 $bam_controls.append(str($g.groupName) + '-' + $ctrl_name + '-bamcontrol.bam') | |
37 #end for | 61 #end for |
38 | 62 $temp_factor.append( {str($g.groupName): $bam_controls} ) |
39 Rscript '$__tool_directory__/diffbind.R' | 63 #end if |
40 -i $infile | 64 |
41 -o '$outfile' | 65 #end for |
42 -p '$plots' | 66 |
43 -f $format | 67 $temp_factor.reverse() |
44 -t $th | 68 $temp_factor_names.append([str($factorName), $temp_factor]) |
45 | 69 |
46 #if $binding_affinity_matrix: | 70 |
47 -b | 71 Rscript '$__tool_directory__/diffbind.R' |
48 #end if | 72 |
73 -i '#echo json.dumps(temp_factor_names)#' | |
74 -o '$outfile' | |
75 -t $th | |
76 -f $out.format | |
77 -p '$plots' | |
78 | |
79 #if $scorecol: | |
80 -n "$scorecol" | |
81 #end if | |
82 #if $lowerbetter: | |
83 -l "$lowerbetter" | |
84 #end if | |
85 #if $summits: | |
86 -s "$summits" | |
87 #end if | |
88 | |
89 #if $out.binding_matrix: | |
90 -b | |
91 #end if | |
92 | |
93 #if $out.rdata: | |
94 -r | |
95 #end if | |
96 | |
97 #if $out.analysis_info: | |
98 -a | |
99 #end if | |
100 | |
101 #if $out.rscript: | |
102 && cp '$__tool_directory__/diffbind.R' '$rscript' | |
103 #end if | |
49 ]]> | 104 ]]> |
50 </command> | 105 </command> |
51 <configfiles> | |
52 <configfile name="infile"><![CDATA[ | |
53 #set $counter = 1 | |
54 #for $sample in $samples: | |
55 #if str( $sample.bamcontrol ) != 'None' and $counter == 1: | |
56 SampleID,Tissue,Factor,Condition,Replicate,bamReads,bamControl,Peaks | |
57 #elif $counter == 1: | |
58 SampleID,Tissue,Factor,Condition,Replicate,bamReads,Peaks | |
59 #end if | |
60 #if str( $sample.bamcontrol ) != 'None': | |
61 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,#echo str($counter) + '_bamcontrol.bam'#,$sample.peaks | |
62 #else: | |
63 $sample.sample_id,$sample.tissue,$sample.factor,$sample.condition,$sample.replicate,#echo str($counter) + '_bamreads.bam'#,$sample.peaks | |
64 #end if | |
65 #set $counter = $counter + 1 | |
66 #end for]]></configfile> | |
67 </configfiles> | |
68 <inputs> | 106 <inputs> |
69 <repeat name="samples" title="Samples" min="2"> | 107 <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores."> |
70 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" /> | 108 <sanitizer> |
71 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" /> | 109 <valid initial="string.letters,string.digits"><add value="_" /></valid> |
72 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" /> | 110 </sanitizer> |
73 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" /> | 111 </param> |
74 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" /> | 112 <repeat name="rep_group" title="Group" min="2" default="2"> |
75 <param name="bamreads" type="data" format="bam" label="Read BAM file" help="Specify the Read BAM file, used for Peak calling."/> | 113 <param name="groupName" type="text" label="Name" |
76 <param name="bamcontrol" type="data" format="bam" optional="True" label="Control BAM file" help="If specifying a control BAM file for this sample, then all samples are required to specify one."/> | 114 help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive)."> |
77 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/> | 115 <sanitizer> |
116 <valid initial="string.letters,string.digits"><add value="_" /></valid> | |
117 </sanitizer> | |
118 </param> | |
119 <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/> | |
120 <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/> | |
121 <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/> | |
78 </repeat> | 122 </repeat> |
79 <param name="th" type="float" value="1" min="0" max="1" | 123 |
80 label="FDR Threshold" | 124 <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"/> |
81 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/> | 125 <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" /> |
82 <param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true" | 126 <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/> |
83 label="Visualising the analysis results" | 127 <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/> |
84 help="output an additional PDF file" /> | 128 |
85 <param name="format" type="select" label="Output Format"> | 129 <!-- Output Options --> |
86 <option value="bed">BED</option> | 130 <section name="out" expanded="false" title="Output Options"> |
87 <option value="gff">GFF</option> | 131 <param name="format" type="select" label="Output Format"> |
88 <option value="wig">WIG</option> | 132 <option value="bed">BED</option> |
89 </param> | 133 <option value="gff">GFF</option> |
90 <param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | 134 <option value="wig">WIG</option> |
135 </param> | |
136 <param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" /> | |
137 <param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" /> | |
138 <param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No"/> | |
139 <param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used will be provided as a text file in the output. Default: No"/> | |
140 <param name="analysis_info" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output analysis info?" help="If this option is set to Yes, information from the dba.count and dba.analyze commmands will be output in a text file. Default: No"/> | |
141 </section> | |
91 </inputs> | 142 </inputs> |
143 | |
92 <outputs> | 144 <outputs> |
93 <data name="outfile" format="bed" label="Differential binding sites on ${on_string}"> | 145 <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites"> |
94 <change_format> | 146 <change_format> |
95 <when input="format" value="wig" format="wig" /> | 147 <when input="format" value="wig" format="wig" /> |
96 <when input="format" value="gff" format="gff" /> | 148 <when input="format" value="gff" format="gff" /> |
97 </change_format> | 149 </change_format> |
98 </data> | 150 </data> |
99 <data name="plots" format="pdf" label="Differential binding sites on ${on_string}"> | 151 <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots"> |
100 <filter>pdf == True</filter> | 152 <filter>out['pdf']</filter> |
101 </data> | 153 </data> |
102 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}"> | 154 <data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix"> |
103 <filter>binding_affinity_matrix == True</filter> | 155 <filter>out['binding_matrix']</filter> |
156 </data> | |
157 <data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file"> | |
158 <filter>out['rdata']</filter> | |
159 </data> | |
160 <data name="rscript" format="txt" label="${tool.name} on ${on_string}: Rscript"> | |
161 <filter>out['rscript']</filter> | |
162 </data> | |
163 <data name="analysis_info" format="txt" from_work_dir="DiffBind_analysis_info.txt" label="${tool.name} on ${on_string}: Analysis info"> | |
164 <filter>out['analysis_info']</filter> | |
104 </data> | 165 </data> |
105 </outputs> | 166 </outputs> |
167 | |
106 <tests> | 168 <tests> |
107 <test> | 169 <test expect_num_outputs="6"> |
108 <repeat name="samples"> | 170 <param name="factorName" value="Condition"/> |
109 <param name="sample_id" value="BT4741" /> | 171 <repeat name="rep_group"> |
110 <param name="tissue" value="BT474" /> | 172 <param name="groupName" value="Resistant"/> |
111 <param name="factor" value="ER" /> | 173 <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> |
112 <param name="condition" value="Resistant" /> | 174 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam,BT474_ER_2.bam" /> |
113 <param name="replicate" value="1" /> | |
114 <param name="bamreads" ftype="bam" value="BT474_ER_1.bam" /> | |
115 <param name="peaks" ftype="bed" value="BT474_ER_1.bed.gz" /> | |
116 </repeat> | 175 </repeat> |
117 <repeat name="samples"> | 176 <repeat name="rep_group"> |
118 <param name="sample_id" value="BT4742" /> | 177 <param name="groupName" value="Responsive"/> |
119 <param name="tissue" value="BT474" /> | 178 <param name="peaks" value="MCF7_ER_1.bed.gz,MCF7_ER_2.bed.gz"/> |
120 <param name="factor" value="ER" /> | 179 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam,MCF7_ER_2.bam" /> |
121 <param name="condition" value="Resistant" /> | |
122 <param name="replicate" value="2" /> | |
123 <param name="bamreads" ftype="bam" value="BT474_ER_2.bam" /> | |
124 <param name="peaks" ftype="bed" value="BT474_ER_2.bed.gz" /> | |
125 </repeat> | 180 </repeat> |
126 <repeat name="samples"> | 181 <param name="scorecol" value="5" /> |
127 <param name="sample_id" value="MCF71" /> | |
128 <param name="tissue" value="MCF7" /> | |
129 <param name="factor" value="ER" /> | |
130 <param name="condition" value="Responsive" /> | |
131 <param name="replicate" value="1" /> | |
132 <param name="bamreads" ftype="bam" value="MCF7_ER_1.bam" /> | |
133 <param name="peaks" ftype="bed" value="MCF7_ER_1.bed.gz" /> | |
134 </repeat> | |
135 <repeat name="samples"> | |
136 <param name="sample_id" value="MCF72" /> | |
137 <param name="tissue" value="MCF7" /> | |
138 <param name="factor" value="ER" /> | |
139 <param name="condition" value="Responsive" /> | |
140 <param name="replicate" value="2" /> | |
141 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" /> | |
142 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" /> | |
143 </repeat> | |
144 <param name="pdf" value="True" /> | 182 <param name="pdf" value="True" /> |
145 <param name="binding_affinity_matrix" value="True" /> | 183 <param name="binding_matrix" value="True" /> |
184 <param name="rdata" value="True" /> | |
185 <param name="rscript" value="True"/> | |
186 <param name="analysis_info" value="True"/> | |
146 <output name="outfile" value="out_diffbind.bed" /> | 187 <output name="outfile" value="out_diffbind.bed" /> |
188 <output name="plots" value="out_plots.pdf" compare="sim_size" /> | |
147 <output name="binding_matrix" value="out_binding.matrix" /> | 189 <output name="binding_matrix" value="out_binding.matrix" /> |
190 <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> | |
191 <output name="rscript" value="out_rscript.txt"/> | |
192 <output name="analysis_info" value="out_analysis_info.txt" compare="sim_size" > | |
193 <assert_contents> | |
194 <has_text text="SessionInfo"/> | |
195 </assert_contents> | |
196 </output> | |
148 </test> | 197 </test> |
149 </tests> | 198 </tests> |
150 <help><![CDATA[ | 199 <help><![CDATA[ |
151 | 200 |
152 .. class:: infomark | 201 .. class:: infomark |
164 between two sample groups. It includes functions to support the processing of peak sets, | 213 between two sample groups. It includes functions to support the processing of peak sets, |
165 including overlapping and merging peak sets, counting sequencing reads overlapping intervals | 214 including overlapping and merging peak sets, counting sequencing reads overlapping intervals |
166 in peak sets, and identifying statistically significantly differentially bound sites based on | 215 in peak sets, and identifying statistically significantly differentially bound sites based on |
167 evidence of binding affinity (measured by differences in read densities). To this end it uses | 216 evidence of binding affinity (measured by differences in read densities). To this end it uses |
168 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages | 217 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages |
169 edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a | 218 edgeR and DESeq2). Additionally, the package builds on Rgraphics routines to provide a |
170 set of standardized plots to aid in binding analysis. | 219 set of standardized plots to aid in binding analysis. |
171 | 220 |
172 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of | 221 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of |
173 examples: the first focusing on the core task of obtaining differentially bound sites based on | 222 examples: the first focusing on the core task of obtaining differentially bound sites based on |
174 affinity data, the second working through the main plotting routines, the third discussing the | 223 affinity data, the second working through the main plotting routines, the third discussing the |
175 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail, | 224 use of a blocking factor, and the fourth revisiting occupancy data (peak calls) in more detail, |
176 as well as comparing the results of an occupancy-based analysis with an affinity-based one. | 225 as well as comparing the results of an occupancy-based analysis with an affinity-based one. |
177 Finally, certain technical aspects of the how these analyses are accomplished are detailed. | 226 Finally, certain technical aspects of the how these analyses are accomplished are detailed. |
178 | 227 |
179 Note DiffBind requires a minimum of four samples (two groups with two replicates each). | 228 Note this DiffBind tool requires a minimum of four samples (two groups with two replicates each). |
180 | 229 |
181 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | 230 ----- |
182 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
183 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | |
184 | 231 |
185 **Inputs** | 232 **Inputs** |
186 | 233 |
187 DiffBind works primarily with peaksets, which are sets of genomic intervals representing | 234 DiffBind works primarily with peaksets, which are sets of genomic intervals representing |
188 candidate protein binding sites. Each interval consists of a chromosome, a start and end | 235 candidate protein binding sites. Each interval consists of a chromosome, a start and end |
192 be associated with each peakset (one for the ChIP data, and optionally another representing | 239 be associated with each peakset (one for the ChIP data, and optionally another representing |
193 a control sample) | 240 a control sample) |
194 | 241 |
195 **Sample Information** | 242 **Sample Information** |
196 | 243 |
197 You have to specify your sample information in the tool form above. | 244 You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive). |
198 | 245 |
199 Example: | 246 Example: |
200 | 247 |
201 ============= ========== ========== ============= ============= | 248 ============= ============= |
202 **SampleID** **Tissue** **Factor** **Condition** **Replicate** | 249 **SampleID** **Group** |
203 ------------- ---------- ---------- ------------- ------------- | 250 ------------- ------------- |
204 BT4741 BT474 ER Resistant 1 | 251 BT4741 Resistant |
205 BT4742 BT474 ER Resistant 2 | 252 BT4742 Resistant |
206 MCF71 MCF7 ER Responsive 1 | 253 MCF71 Responsive |
207 MCF72 MCF7 ER Responsive 2 | 254 MCF72 Responsive |
208 MCF73 MCF7 ER Responsive 3 | 255 ============= ============= |
209 T47D1 T47D ER Responsive 1 | |
210 T47D2 T47D ER Responsive 2 | |
211 MCF7r1 MCF7 ER Resistant 1 | |
212 MCF7r2 MCF7 ER Resistant 2 | |
213 ZR751 ZR75 ER Responsive 1 | |
214 ZR752 ZR75 ER Responsive 2 | |
215 ============= ========== ========== ============= ============= | |
216 | |
217 Or provide a sample sheet tabular file such as below. | |
218 | |
219 Example: | |
220 | |
221 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
222 SampleID Tissue Factor Condition Treatment Replicate bamReads ControlID bamControl Peaks PeakCaller | |
223 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
224 BT4741 BT474 ER Resistant Full-Media 1 Chr18_BT474_ER_1.bam BT474c Chr18_BT474_input.bam BT474_ER_1.bed.gz bed | |
225 BT4742 BT474 ER Resistant Full-Media 2 Chr18_BT474_ER_2.bam BT474c Chr18_BT474_input.bam BT474_ER_2.bed.gz bed | |
226 MCF71 MCF7 ER Responsive Full-Media 1 Chr18_MCF7_ER_1.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_1.bed.gz bed | |
227 MCF72 MCF7 ER Responsive Full-Media 2 Chr18_MCF7_ER_2.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_2.bed.gz bed | |
228 MCF73 MCF7 ER Responsive Full-Media 3 Chr18_MCF7_ER_3.bam MCF7c Chr18_MCF7_input.bam MCF7_ER_3.bed.gz bed | |
229 T47D1 T47D ER Responsive Full-Media 1 Chr18_T47D_ER_1.bam T47Dc Chr18_T47D_input.bam T47D_ER_1.bed.gz bed | |
230 T47D2 T47D ER Responsive Full-Media 2 Chr18_T47D_ER_2.bam T47Dc Chr18_T47D_input.bam T47D_ER_2.bed.gz bed | |
231 MCF7r1 MCF7 ER Resistant Full-Media 1 Chr18_TAMR_ER_1.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_1.bed.gz bed | |
232 MCF7r2 MCF7 ER Resistant Full-Media 2 Chr18_TAMR_ER_2.bam TAMRc Chr18_TAMR_input.bam TAMR_ER_2.bed.gz bed | |
233 ZR751 ZR75 ER Responsive Full-Media 1 Chr18_ZR75_ER_1.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_1.bed.gz bed | |
234 ZR752 ZR75 ER Responsive Full-Media 2 Chr18_ZR75_ER_2.bam ZR75c Chr18_ZR75_input.bam ZR75_ER_2.bed.gz bed | |
235 ======== ====== ====== ========== ========== ========= ==================== ========= ===================== ================= ========== | |
236 | 256 |
237 | 257 |
238 **Peak files** | 258 **Peak files** |
239 | 259 |
240 Result of your Peak calling experiment in bed format, one file for each sample is required. | 260 Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2. |
241 | 261 |
242 Example: | 262 Example (MACS.xls file in bed format): |
243 | 263 |
244 ======= ======= ======= =============== ======= | 264 ======= ======= ======= =============== ============== |
245 1 2 3 4 **5** | 265 1 2 3 4 **5 (Score)** |
246 ======= ======= ======= =============== ======= | 266 ======= ======= ======= =============== ============== |
247 chr18 215562 216063 MACS_peak_16037 56.11 | 267 chr18 215562 216063 MACS_peak_16037 56.11 |
248 chr18 311530 312105 MACS_peak_16038 222.49 | 268 chr18 311530 312105 MACS_peak_16038 222.49 |
249 chr18 356656 357315 MACS_peak_16039 92.06 | 269 chr18 356656 357315 MACS_peak_16039 92.06 |
250 chr18 371110 372092 MACS_peak_16040 123.86 | 270 chr18 371110 372092 MACS_peak_16040 123.86 |
251 chr18 395116 396464 MACS_peak_16041 1545.39 | 271 chr18 395116 396464 MACS_peak_16041 1545.39 |
252 chr18 399014 400382 MACS_peak_16042 1835.19 | 272 chr18 399014 400382 MACS_peak_16042 1835.19 |
253 chr18 499134 500200 MACS_peak_16043 748.32 | 273 chr18 499134 500200 MACS_peak_16043 748.32 |
254 chr18 503518 504552 MACS_peak_16044 818.30 | 274 chr18 503518 504552 MACS_peak_16044 818.30 |
255 chr18 531672 532274 MACS_peak_16045 159.30 | 275 chr18 531672 532274 MACS_peak_16045 159.30 |
256 chr18 568326 569282 MACS_peak_16046 601.11 | 276 chr18 568326 569282 MACS_peak_16046 601.11 |
257 ======= ======= ======= =============== ======= | 277 ======= ======= ======= =============== ============== |
258 | 278 |
259 * BAM file which contains the mapped sequencing reads can be associated with each peakset | 279 * BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required. |
260 * Control BAM file represents a control dataset and are optional, but have to specified for all when used. | 280 * Optional: Control BAM file representing a control dataset. If used, has to be specified for all samples. Note that the DiffBind authors say control reads are best utilized prior to running DiffBind, at the peak calling stage (e.g. with MACS2) and in blacklists, see this `Bioconductor post`_. |
261 | 281 |
282 ----- | |
262 | 283 |
263 **Outputs** | 284 **Outputs** |
264 | 285 |
286 This tool outputs | |
287 | |
288 * differentially bound sites in BED, WIG or GFF format | |
289 | |
290 Optionally, under **Output Options** you can choose to output | |
291 | |
292 * a PDF of plots (Heatmap, PCA, MA, Volcano, Boxplots) | |
293 * a binding affinity matrix | |
294 * the R script used by this tool | |
295 * an RData file of the R objects generated | |
296 * a text file with information on the analysis (number of Intervals, FriP scores, method used) | |
297 | |
298 **Differentially Bound Sites** | |
299 | |
265 As output format you can choose BED, GFF, WIG. | 300 As output format you can choose BED, GFF, WIG. |
266 | 301 |
267 Example: | 302 Example - BED format: |
268 | 303 |
269 ======== ====== =======+ | 304 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
270 seqnames ranges strand Conc Conc_Resistant | 305 seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR** |
271 | 306 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
272 2452 chr18 [64490686, 64491186] * | 6.36 1.39 | 307 chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 |
273 1291 chr18 [34597713, 34598213] * | 5.33 0.22 | 308 chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 |
274 976 chr18 [26860997, 26861497] * | 7.3 3.13 | 309 chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 |
275 2338 chr18 [60892900, 60893400] * | 7.13 1.84 | 310 chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 |
276 2077 chr18 [55569087, 55569587] * | 5.52 1.89 | 311 chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 |
277 | 312 ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== |
278 Conc_Responsive Fold p-value FDR | 313 |
279 <numeric> <numeric> <numeric> <numeric> | 314 Columns contain the following data: |
280 2452 7 -5.61 3.57e-10 1.02e-06 | 315 |
281 1291 5.97 -5.75 1.1e-09 1.57e-06 | 316 * **1st**: Chromosome name |
282 976 7.92 -4.79 1.1e-08 1.05e-05 | 317 * **2nd**: Start position of site |
283 2338 7.77 -5.93 1.68e-08 1.17e-05 | 318 * **3rd**: End position of site |
284 2077 6.13 -4.23 2.36e-08 1.17e-05 | 319 * **4th**: Length of site |
285 | 320 * **5th**: Strand |
286 The value columns show the | 321 * **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) |
287 Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) | 322 * **7th**: Mean concentration over the first (e.g. Resistant) group |
288 Conc_Resistant mean concentration over the first (Resistant) group | 323 * **8th**: Mean concentration over second (e.g. Responsive) group |
289 Conc_Responsive mean concentration over second (Responsive) group | 324 * **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group. |
290 Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group. | 325 * **10th**: P-value confidence measure for identifying these sites as differentially bound |
291 p-value confidence measure for identifying these sites as differentially bound | 326 * **11th**: a multiple testing corrected FDR p-value |
292 FDR a multiple testing corrected FDR p-value | |
293 | 327 |
294 | 328 |
295 **Binding Affinity Matrix** | 329 **Binding Affinity Matrix** |
296 | 330 |
297 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent | 331 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent |
298 differential analysis. | 332 differential analysis. |
299 | 333 |
300 Example: | 334 Example: |
301 | 335 |
302 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 336 ===== ====== ====== ================ ================ ================ ================ |
303 ID Tissue Factor Condition Treatment Replicate Caller Intervals FRiP | 337 CHR START END MCF7_ER_1.bed MCF7_ER_2.bed BT474_ER_1.bed BT474_ER_2.bed |
304 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 338 ===== ====== ====== ================ ================ ================ ================ |
305 BT4741 BT474 ER Resistant Full-Media 1 counts 2845 0.16 | 339 chr18 111567 112005 137.615208000375 59.878372946728 29.4139375878664 19.9594576489093 |
306 BT4742 BT474 ER Resistant Full-Media 2 counts 2845 0.15 | 340 chr18 189223 189652 19.9594576489093 12.6059732519427 11.5554754809475 23.110950961895 |
307 MCF71 MCF7 ER Responsive Full-Media 1 counts 2845 0.27 | 341 chr18 215232 216063 11.5554754809475 15.7574665649284 31.5149331298568 72.4843461986707 |
308 MCF72 MCF7 ER Responsive Full-Media 2 counts 2845 0.17 | 342 chr18 311530 312172 17.8584621069189 11.5554754809475 54.6258840917518 43.0704086108043 |
309 MCF73 MCF7 ER Responsive Full-Media 3 counts 2845 0.23 | 343 chr18 346464 347342 75.6358395116564 40.9694130688139 21.0099554199046 16.8079643359236 |
310 T47D1 T47D ER Responsive Full-Media 1 counts 2845 0.10 | 344 chr18 356560 357362 11.5554754809475 14.7069687939332 57.7773774047375 53.5753863207566 |
311 T47D2 T47D ER Responsive Full-Media 2 counts 2845 0.06 | 345 chr18 371110 372102 8.40398216796182 9.45447993895705 81.9388261376278 82.989323908623 |
312 MCF7r1 MCF7 ER Resistant Full-Media 1 counts 2845 0.20 | 346 chr18 394600 396513 56.7268796337423 43.0704086108043 510.541916703681 438.05757050501 |
313 MCF7r2 MCF7 ER Resistant Full-Media 2 counts 2845 0.13 | 347 chr18 399014 400382 156.524167878289 117.655750351465 558.864814169461 496.885445680743 |
314 ZR751 ZR75 ER Responsive Full-Media 1 counts 2845 0.32 | 348 chr18 498906 500200 767.913870597511 278.381909313735 196.443083176108 181.736114382174 |
315 ZR752 ZR75 ER Responsive Full-Media 2 counts 2845 0.22 | 349 ===== ====== ====== ================ ================ ================ ================ |
316 ====== ====== ====== ========== ========== ========= ====== ========= ==== | 350 |
317 | 351 ----- |
318 | |
319 | 352 |
320 **More Information** | 353 **More Information** |
321 | 354 |
322 Generally, processing data with DiffBind involves five phases: | 355 Generally, processing data with DiffBind involves five phases: |
323 | 356 |
326 #. Counting reads | 359 #. Counting reads |
327 #. Differential binding affinity analysis | 360 #. Differential binding affinity analysis |
328 #. Plotting and reporting | 361 #. Plotting and reporting |
329 | 362 |
330 | 363 |
331 * **Reading in peaksets**: | 364 **Reading in peaksets**: |
332 | 365 |
333 The first step is to read in a set of peaksets and associated | 366 The first step is to read in a set of peaksets and associated |
334 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS | 367 metadata. Peaksets are derived either from ChIP-Seq peak callers, such as **MACS2**, or using some other criterion (e.g. genomic windows, or all the promoter regions |
335 ([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions | 368 in a genome). A single experiment can have more than |
336 in a genome). The easiest way to read in peaksets is using a comma-separated value | |
337 (csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with | |
338 a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than | |
339 one associated peakset; e.g. if multiple peak callers are used for comparison purposes | 369 one associated peakset; e.g. if multiple peak callers are used for comparison purposes |
340 each sample would have more than one line in the sample sheet. Once the peaksets | 370 each sample would have more than one line in the sample sheet. Once the peaksets |
341 are read in, a merging function finds all overlapping peaks and derives a single set of | 371 are read in, a merging function finds all overlapping peaks and derives a single set of |
342 unique genomic intervals covering all the supplied peaks (a consensus peakset for the | 372 unique genomic intervals covering all the supplied peaks (a consensus peakset for the |
343 experiment). | 373 experiment). |
344 | 374 |
345 * **Occupancy analysis**: | 375 **Occupancy analysis**: |
346 | 376 |
347 Peaksets, especially those generated by peak callers, provide | 377 Peaksets, especially those generated by peak callers, provide |
348 an insight into the potential occupancy of the protein being ChIPed for at specific | 378 an insight into the potential occupancy of the protein being ChIPed for at specific |
349 genomic loci. After the peaksets have been loaded, it can be useful to perform some | 379 genomic loci. After the peaksets have been loaded, it can be useful to perform some |
350 exploratory plotting to determine how these occupancy maps agree with each other, | 380 exploratory plotting to determine how these occupancy maps agree with each other, |
354 overlaps to be examined, as well as functions to determine how well similar samples | 384 overlaps to be examined, as well as functions to determine how well similar samples |
355 cluster together. Beyond quality control, the product of an occupancy analysis may be | 385 cluster together. Beyond quality control, the product of an occupancy analysis may be |
356 a consensus peakset, representing an overall set of candidate binding sites to be used | 386 a consensus peakset, representing an overall set of candidate binding sites to be used |
357 in further analysis. | 387 in further analysis. |
358 | 388 |
359 * **Counting reads**: | 389 **Counting reads**: |
360 | 390 |
361 Once a consensus peakset has been derived, DiffBind can use the | 391 Once a consensus peakset has been derived, DiffBind can use the |
362 supplied sequence read files to count how many reads overlap each interval for each | 392 supplied sequence read files to count how many reads overlap each interval for each |
363 unique sample. The peaks in the consensus peakset may be re-centered and trimmed | 393 unique sample. The peaks in the consensus peakset may be re-centered and trimmed |
364 based on calculating their summits (point of greatest read overlap) in order to provide | 394 based on calculating their summits (point of greatest read overlap) in order to provide |
366 containing a (normalized) read count for each sample at every potential binding site. | 396 containing a (normalized) read count for each sample at every potential binding site. |
367 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, | 397 With this matrix, the samples can be re-clustered using affinity, rather than occupancy, |
368 data. The binding affinity matrix is used for QC plotting as well as for subsequent | 398 data. The binding affinity matrix is used for QC plotting as well as for subsequent |
369 differential analysis. | 399 differential analysis. |
370 | 400 |
371 * **Differential binding affinity analysis**: | 401 **Differential binding affinity analysis**: |
372 | 402 |
373 The core functionality of DiffBind is the | 403 The core functionality of DiffBind is the |
374 differential binding affinity analysis, which enables binding sites to be identified that | 404 differential binding affinity analysis, which enables binding sites to be identified that |
375 are statistically significantly differentially bound between sample groups. To accomplish | 405 are statistically significantly differentially bound between sample groups. To accomplish |
376 this, first a contrast (or contrasts) is established, dividing the samples into groups to | 406 this, first a contrast (or contrasts) is established, dividing the samples into groups to |
377 be compared. Next the core analysis routines are executed, by default using DESeq2 . | 407 be compared. Next the core analysis routines are executed, by default using DESeq2 . |
378 This will assign a p-value and FDR to each candidate binding site indicating confidence | 408 This will assign a p-value and FDR to each candidate binding site indicating confidence |
379 that they are differentially bound. | 409 that they are differentially bound. |
380 | 410 |
381 * **Plotting and reporting**: | 411 **Plotting and reporting**: |
382 | 412 |
383 Once one or more contrasts have been run, DiffBind provides | 413 Once one or more contrasts have been run, DiffBind provides |
384 a number of functions for reporting and plotting the results. MA plots give an | 414 a number of functions for reporting and plotting the results. MA plots give an |
385 overview of the results of the analysis, while correlation heatmaps and PCA plots show | 415 overview of the results of the analysis, while correlation heatmaps and PCA plots show |
386 how the groups cluster based on differentially bound sites. Boxplots show the distribution | 416 how the groups cluster based on differentially bound sites. Boxplots show the distribution |
387 of reads within differentially bound sites corresponding to whether they gain or | 417 of reads within differentially bound sites corresponding to whether they gain or |
388 lose affinity between the two sample groups. A reporting mechanism enables differentially | 418 lose affinity between the two sample groups. A reporting mechanism enables differentially |
389 bound sites to be extracted for further processing, such as annotation, motif, and | 419 bound sites to be extracted for further processing, such as annotation, motif, and |
390 pathway analyses. | 420 pathway analyses. |
391 | 421 |
422 ----- | |
423 | |
392 **References** | 424 **References** |
393 | 425 |
394 DiffBind Authors: Rory Stark, Gordon Brown (2011) | 426 DiffBind Authors: Rory Stark, Gordon Brown (2011) |
395 Wrapper authors: Bjoern Gruening, Pavankumar Videm | 427 Wrapper authors: Bjoern Gruening, Pavankumar Videm |
428 | |
429 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
430 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html | |
431 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf | |
432 .. _`Bioconductor post`: https://support.bioconductor.org/p/69924/ | |
396 | 433 |
397 ]]> | 434 ]]> |
398 </help> | 435 </help> |
399 <citations> | 436 <citations> |
400 <citation type="doi">doi:10.1038/nature10730</citation> | 437 <citation type="doi">doi:10.1038/nature10730</citation> |