view cooler_balance.xml @ 0:3cb8c96f8c3c draft default tip

Uploaded
author sauria
date Fri, 17 Jan 2020 16:16:13 +0000
parents
children
line wrap: on
line source

<tool id="cooler_balance" name="Cooler balance" version="0.8.5+galaxy1">
  <description>perform matrix balancing on a cooler file</description>
  <requirements>
    <requirement type="package" version="0.8.5">cooler</requirement>
  </requirements>
  <command detect_errors="aggressive"><![CDATA[
cp $input $output;
cooler balance $output -f --tol $tolerance --max-iters $maxiter $reads
#if str($madmax) != "0":
  --mad-max $(madmax)
#end if
#if str($minnnz) != "0":
  --min-nnz $(minnnz)
#end if
#if str($mincount) != "0":
  --min-count $(mincount)
#end if
#if str($ignorediag) != "0":
  --ignore-diags $(ignorediag)
#end if
#if str($ignoredist) != "0":
  --ignore-dist $(ignoredist)
#end if
#if str($blacklist) != "None":
  --blacklist $(blacklist)
#end if
  ]]></command>
  <inputs>
    <param name="input" type="data" format="cool" label="Cool file"/>
    <param name="madmax" type="integer" label="MAD-max" value="0" help="Median absolute deviation filter">
      <validator type="expression" message="MAD-max value must be a non-negative integer.">value >= 0</validator>
    </param>
    <param name="minnnz" type="integer" label="Min nonzero" value="0" help="Minimum nonzero marginal filter">
      <validator type="expression" message="Min nonzero value must be a non-negative integer.">value >= 0</validator>
    </param>
    <param name="mincount" type="integer" label="Min count" value="0" help="Minimum count marginal filter">
      <validator type="expression" message="Min count value must be a non-negative integer.">value >= 0</validator>
    </param>
    <param name="blacklist" type="data" format="bed" label="Blacklist" optional="true" help="BED file of regions to mask"/>
    <param name="ignorediag" type="integer" label="Ignore diagonals" value="0" help="Number of matrix diagonals to ignore">
      <validator type="expression" message="Ignore diagonals value must be a non-negative integer.">value >= 0</validator>
    </param>
    <param name="ignoredist" type="integer" label="Ignore distance" value="0" help="Interaction sizes to ignore (bp)">
      <validator type="expression" message="Ignore distance value must be a non-negative integer.">value >= 0</validator>
    </param>
    <param name="tolerance" type="float" label="Convergence threshold" value="1e-5" help="Threshold of variance of the marginals for algorithm convergence">
      <validator type="expression" message="Convergence threshold must be a non-negative decimal.">value >= 0</validator>
    </param>
    <param name="maxiter" type="integer" label="Max iterations" value="200" help="Maximum number of iterations">
      <validator type="expression" message="Max iterations value must be a positive integer.">value > 0</validator>
    </param>
    <param name="reads" type="select" label="Interactions to use">
      <option value="">All</option>
      <option value="--cis-only">Cis only</option>
      <option value="--trans-only">Trans only</option>
    </param>
  </inputs>
  <outputs>
    <data name="output" format="cool"/>
  </outputs>
  <tests>
    <test>
      <param name="input" value="input.cool" ftype="cool"/>
      <param name='minnnz' value='4'/>
      <param name='blacklist' value="blacklist.bed" ftype="bed"/>
      <param name='ignorediag' value='1'/>
      <param name='tolerance' value='0.03'/>
      <output name="output">
        <assert_contents>
          <has_h5_keys keys="bins,chroms,indexes,pixels,bins/weight" />
        </assert_contents>
      </output>
      <assert_stderr>
        <has_text text="variance is 0.0298029" />
      </assert_stderr>
    </test>
  </tests>
  <help><![CDATA[
**Usage**

*Cooler balance* copies a chromatin interaction cooler file and performs matrix balancing on it.


**Options**

* *MAD-max*: This filter ignores bins in the contact matrix whose log marginal sum is less than this number of median absolute deviations below the median log marginal sum of all of the bins in the same chromosome (0 means no filtering).

* Min nonzero: This filter ignores bins in the contact matrix whose marginal number of nonzeros is less than this value (0 means no filtering).

* Min count: This filter ignores bins in the contact matrix whose marginal count is less than this value (0 means no filtering).

* Blacklist: This is an optional BED file denoting genomic regions to ignore during matrix balancing.

* Ignore diagonals: This filter ignores cis-interactions that are less than this far from the matrix diagonal (0 means no filtering).* Ignore diagonals: This filter ignores cis-interactions that are less than this far from the matrix diagonal (0 means no filtering).

* Ignore distance: This filter ignores interactions that are shorter than or equal to this value (0 means no filtering).

* Tolerance: Threshold value of variance of the marginals for the algorithm to converge.

* Max iterations: The maximum number of iterations to perform if convergence is not achieved.

* Interactions to use: Which interactions to use for matrix balancing.

  ]]></help>
  <citations>
    <citation type='bibtex'>
@ARTICLE{Abdennur2020-vg,
  title    = "Cooler: scalable storage for {Hi-C} data and other genomically
              labeled arrays",
  author   = "Abdennur, Nezar and Mirny, Leonid A",
  abstract = "MOTIVATION: Most existing coverage-based (epi)genomic datasets
              are one-dimensional, but newer technologies probing interactions
              (physical, genetic, etc.) produce quantitative maps with
              two-dimensional genomic coordinate systems. Storage and
              computational costs mount sharply with data resolution when such
              maps are stored in dense form. Hence, there is a pressing need to
              develop data storage strategies that handle the full range of
              useful resolutions in multidimensional genomic datasets by taking
              advantage of their sparse nature, while supporting efficient
              compression and providing fast random access to facilitate
              development of scalable algorithms for data analysis. RESULTS: We
              developed a file format called cooler, based on a sparse data
              model, that can support genomically labeled matrices at any
              resolution. It has the flexibility to accommodate various
              descriptions of the data axes (genomic coordinates, tracks and
              bin annotations), resolutions, data density patterns and
              metadata. Cooler is based on HDF5 and is supported by a Python
              library and command line suite to create, read, inspect and
              manipulate cooler data collections. The format has been adopted
              as a standard by the NIH 4D Nucleome Consortium. AVAILABILITY AND
              IMPLEMENTATION: Cooler is cross-platform, BSD-licensed and can be
              installed from the Python package index or the bioconda
              repository. The source code is maintained on Github at
              https://github.com/mirnylab/cooler. SUPPLEMENTARY INFORMATION:
              Supplementary data are available at Bioinformatics online.",
  journal  = "Bioinformatics",
  volume   =  36,
  number   =  1,
  pages    = "311--316",
  month    =  jan,
  year     =  2020,
  language = "en"
}
    </citation>
  </citations>
</tool>