view correct_barcodes.xml @ 31:9dcdd56ca7a5 draft default tip

"planemo upload for repository https://github.com/galaxyproject/dunovo commit c25493f6d77a2cf0c8c653d9e94c2bbf82cbd442-dirty"
author nick
date Wed, 16 Feb 2022 22:41:06 +0000
parents 9d28b4509c02
children
line wrap: on
line source

<?xml version="1.0"?>
<tool id="correct_barcodes" name="Du Novo: Correct barcodes" version="3.0.2">
  <description>of duplex sequencing reads</description>
  <requirements>
    <requirement type="package" version="3.0.2">dunovo</requirement>
  </requirements>
  <version_command>correct.py --version</version_command>
  <command detect_errors="exit_code"><![CDATA[
    baralign.sh -c $advanced.chunkmbs -t \${GALAXY_SLOTS:-1} -g
      #if $phone == '--phone-home':
        -p
      #end if
      '$input' refdir correct.sam
    && correct.py --galaxy $phone $check_ids $allow_no_nm_if_ns --dist $dist --mapq $mapq --pos $pos
      '$input' refdir/barcodes.fa correct.sam
      > families.corrected.tsv
    && sort families.corrected.tsv
      > '$output'
  ]]>
  </command>
  <inputs>
    <param name="input" type="data" format="tabular" label="Input reads" help="with barcodes, grouped by family"/>
    <param name="dist" type="integer" value="3" min="1" label="Maximum differences" help="Only use alignments where the barcodes differ by at most these many errors. Note that raising this beyond 3 won't have an effect, because of the inherent limit in bowtie's ability to match up distant barcodes."/>
    <param name="mapq" type="integer" value="20" min="0" label="Minimum mapping quality" help="Only use alignments whose MAPQ is at least this."/>
    <param name="pos" type="integer" value="2" min="0" label="Maximum start offset" help="Ignore alignments where the start positions differ by more than this."/>
    <param name="check_ids" type="boolean" truevalue="" falsevalue="--no-check-ids" checked="True" label="Check read names" help="Make sure reads are properly paired up. The job will fail if there is a pair of reads where their ids aren't identical (minus any ending /1 or /2)."/>
    <param name="allow_no_nm_if_ns" type="boolean" truevalue="--allow-no-nm-if-ns" falsevalue="" checked="False" label="Allow missing NM tags" help="Allow alignments with missing NM tags if the barcode has at least one N. Try checking this if you get the error &quot;Alignment missing NM tag, but likely due to N's in sequence&quot;."/>
    <param name="phone" type="boolean" truevalue="--phone-home" falsevalue="" checked="False" label="Send usage data" help="Report helpful usage data to the developer, to better understand the use cases and performance of the tool. The only data which will be recorded is the name and version of the tool, the size of the input data, the time and memory taken to process it, some performance-related parameters, and the IP address of the machine running it. Also, if the tool fails, it will report the name of the exception thrown and the line of code it occurred in. The parameters and input/output dataset names are not sent. All the reporting and recording code is available at https://github.com/NickSto/ET"/>
    <section name="advanced" title="Advanced Options" expanded="false">
      <param name="chunkmbs" type="integer" value="512" min="16" label="bowtie --chunkmbs" help="This is the number of megabytes to give each bowtie thread for storing path descriptors. If you see warnings about &quot;Exhausted best-first chunk memory&quot; in stderr, you need to increase this."/>
    </section>
  </inputs>
  <outputs>
    <data name="output" format="tabular"/>
  </outputs>
  <help>

**What it does**

This is for processing duplex sequencing data. This will correct duplex barcodes and create new, larger families. Errors in barcodes normally prevent them from being recognized as the same as the other barcodes in their family. Correcting these errors allows the original, full families to be reconstructed, saving reads which would otherwise be lost. This tool accomplishes this by doing an all vs. all alignment between the barcodes with bowtie. This identifies ones which are identical except a few, small differences.

-----

**Input**

This expects the output format of the "Make families" tool.

-----

**Output**

The output format is the same as the input format, ready to be consumed by the "Align families" tool.

  </help>
  <citations>
    <citation type="bibtex">@article{Stoler2016,
      author = {Stoler, Nicholas and Arbeithuber, Barbara and Guiblet, Wilfried and Makova, Kateryna D and Nekrutenko, Anton},
      doi = {10.1186/s13059-016-1039-4},
      issn = {1474-760X},
      journal = {Genome biology},
      number = {1},
      pages = {180},
      pmid = {27566673},
      publisher = {Genome Biology},
      title = {{Streamlined analysis of duplex sequencing data with Du Novo.}},
      url = {http://www.ncbi.nlm.nih.gov/pubmed/27566673},
      volume = {17},
      year = {2016}
    }</citation>
  </citations>
</tool>