changeset 0:3cb8c96f8c3c draft default tip

Uploaded
author sauria
date Fri, 17 Jan 2020 16:16:13 +0000
parents
children
files cooler_balance.xml
diffstat 1 files changed, 146 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cooler_balance.xml	Fri Jan 17 16:16:13 2020 +0000
@@ -0,0 +1,146 @@
+<tool id="cooler_balance" name="Cooler balance" version="0.8.5+galaxy1">
+  <description>perform matrix balancing on a cooler file</description>
+  <requirements>
+    <requirement type="package" version="0.8.5">cooler</requirement>
+  </requirements>
+  <command detect_errors="aggressive"><![CDATA[
+cp $input $output;
+cooler balance $output -f --tol $tolerance --max-iters $maxiter $reads
+#if str($madmax) != "0":
+  --mad-max $(madmax)
+#end if
+#if str($minnnz) != "0":
+  --min-nnz $(minnnz)
+#end if
+#if str($mincount) != "0":
+  --min-count $(mincount)
+#end if
+#if str($ignorediag) != "0":
+  --ignore-diags $(ignorediag)
+#end if
+#if str($ignoredist) != "0":
+  --ignore-dist $(ignoredist)
+#end if
+#if str($blacklist) != "None":
+  --blacklist $(blacklist)
+#end if
+  ]]></command>
+  <inputs>
+    <param name="input" type="data" format="cool" label="Cool file"/>
+    <param name="madmax" type="integer" label="MAD-max" value="0" help="Median absolute deviation filter">
+      <validator type="expression" message="MAD-max value must be a non-negative integer.">value >= 0</validator>
+    </param>
+    <param name="minnnz" type="integer" label="Min nonzero" value="0" help="Minimum nonzero marginal filter">
+      <validator type="expression" message="Min nonzero value must be a non-negative integer.">value >= 0</validator>
+    </param>
+    <param name="mincount" type="integer" label="Min count" value="0" help="Minimum count marginal filter">
+      <validator type="expression" message="Min count value must be a non-negative integer.">value >= 0</validator>
+    </param>
+    <param name="blacklist" type="data" format="bed" label="Blacklist" optional="true" help="BED file of regions to mask"/>
+    <param name="ignorediag" type="integer" label="Ignore diagonals" value="0" help="Number of matrix diagonals to ignore">
+      <validator type="expression" message="Ignore diagonals value must be a non-negative integer.">value >= 0</validator>
+    </param>
+    <param name="ignoredist" type="integer" label="Ignore distance" value="0" help="Interaction sizes to ignore (bp)">
+      <validator type="expression" message="Ignore distance value must be a non-negative integer.">value >= 0</validator>
+    </param>
+    <param name="tolerance" type="float" label="Convergence threshold" value="1e-5" help="Threshold of variance of the marginals for algorithm convergence">
+      <validator type="expression" message="Convergence threshold must be a non-negative decimal.">value >= 0</validator>
+    </param>
+    <param name="maxiter" type="integer" label="Max iterations" value="200" help="Maximum number of iterations">
+      <validator type="expression" message="Max iterations value must be a positive integer.">value > 0</validator>
+    </param>
+    <param name="reads" type="select" label="Interactions to use">
+      <option value="">All</option>
+      <option value="--cis-only">Cis only</option>
+      <option value="--trans-only">Trans only</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="output" format="cool"/>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input" value="input.cool" ftype="cool"/>
+      <param name='minnnz' value='4'/>
+      <param name='blacklist' value="blacklist.bed" ftype="bed"/>
+      <param name='ignorediag' value='1'/>
+      <param name='tolerance' value='0.03'/>
+      <output name="output">
+        <assert_contents>
+          <has_h5_keys keys="bins,chroms,indexes,pixels,bins/weight" />
+        </assert_contents>
+      </output>
+      <assert_stderr>
+        <has_text text="variance is 0.0298029" />
+      </assert_stderr>
+    </test>
+  </tests>
+  <help><![CDATA[
+**Usage**
+
+*Cooler balance* copies a chromatin interaction cooler file and performs matrix balancing on it.
+
+
+**Options**
+
+* *MAD-max*: This filter ignores bins in the contact matrix whose log marginal sum is less than this number of median absolute deviations below the median log marginal sum of all of the bins in the same chromosome (0 means no filtering).
+
+* Min nonzero: This filter ignores bins in the contact matrix whose marginal number of nonzeros is less than this value (0 means no filtering).
+
+* Min count: This filter ignores bins in the contact matrix whose marginal count is less than this value (0 means no filtering).
+
+* Blacklist: This is an optional BED file denoting genomic regions to ignore during matrix balancing.
+
+* Ignore diagonals: This filter ignores cis-interactions that are less than this far from the matrix diagonal (0 means no filtering).* Ignore diagonals: This filter ignores cis-interactions that are less than this far from the matrix diagonal (0 means no filtering).
+
+* Ignore distance: This filter ignores interactions that are shorter than or equal to this value (0 means no filtering).
+
+* Tolerance: Threshold value of variance of the marginals for the algorithm to converge.
+
+* Max iterations: The maximum number of iterations to perform if convergence is not achieved.
+
+* Interactions to use: Which interactions to use for matrix balancing.
+
+  ]]></help>
+  <citations>
+    <citation type='bibtex'>
+@ARTICLE{Abdennur2020-vg,
+  title    = "Cooler: scalable storage for {Hi-C} data and other genomically
+              labeled arrays",
+  author   = "Abdennur, Nezar and Mirny, Leonid A",
+  abstract = "MOTIVATION: Most existing coverage-based (epi)genomic datasets
+              are one-dimensional, but newer technologies probing interactions
+              (physical, genetic, etc.) produce quantitative maps with
+              two-dimensional genomic coordinate systems. Storage and
+              computational costs mount sharply with data resolution when such
+              maps are stored in dense form. Hence, there is a pressing need to
+              develop data storage strategies that handle the full range of
+              useful resolutions in multidimensional genomic datasets by taking
+              advantage of their sparse nature, while supporting efficient
+              compression and providing fast random access to facilitate
+              development of scalable algorithms for data analysis. RESULTS: We
+              developed a file format called cooler, based on a sparse data
+              model, that can support genomically labeled matrices at any
+              resolution. It has the flexibility to accommodate various
+              descriptions of the data axes (genomic coordinates, tracks and
+              bin annotations), resolutions, data density patterns and
+              metadata. Cooler is based on HDF5 and is supported by a Python
+              library and command line suite to create, read, inspect and
+              manipulate cooler data collections. The format has been adopted
+              as a standard by the NIH 4D Nucleome Consortium. AVAILABILITY AND
+              IMPLEMENTATION: Cooler is cross-platform, BSD-licensed and can be
+              installed from the Python package index or the bioconda
+              repository. The source code is maintained on Github at
+              https://github.com/mirnylab/cooler. SUPPLEMENTARY INFORMATION:
+              Supplementary data are available at Bioinformatics online.",
+  journal  = "Bioinformatics",
+  volume   =  36,
+  number   =  1,
+  pages    = "311--316",
+  month    =  jan,
+  year     =  2020,
+  language = "en"
+}
+    </citation>
+  </citations>
+</tool>
\ No newline at end of file