view discosnp_pp.xml @ 0:91f0ee4e7f48 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/colibread commit a213833526146246d277ec7851165971449b501e
author iuc
date Fri, 20 Oct 2017 03:12:42 -0400
parents
children 7fe259133cb0
line wrap: on
line source

<?xml version='1.0' encoding='utf-8'?>
<tool profile="16.04" id="discosnp_pp" name="DiscoSnp++" version="2.2.10">
    <description>is an efficient tool for detecting SNPs without a reference genome.</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <requirements>
        <requirement type="package" version="2.2.10">discosnp</requirement>
    </requirements>
    <command><![CDATA[
        ## simple option
        #if str( $input_type_options.input_type) == "single"
            @discosnp_single_reads@
        ## paired option
        #else if str( $input_type_options.input_type) == "paired"
            @discosnp_paired_reads@
        ## mix option
        #else
            @discosnp_single_reads@
            @discosnp_paired_reads@
        #end if

        #if str($VCF_option.mapping) == 'reference'
            #set $reference_file = str($VCF_option.G) + "." + $VCF_option.G.ext
            ln -sf '${VCF_option.G}' '${reference_file}' &&
        #end if

        run_discoSnp++.sh
        -r input.lst
        -b ${b}
        -D ${D}
        -P ${P}
        ${low_complexity}
        -k ${k}
        ${t}
        ${T}

        #if str($coverage_options_type.coverage_options) == 'auto'
            -c auto
        #else
            -c '${coverage_options_type.c}'
        #end if
        -C ${C}
        -d ${d}

        #if str($VCF_option.mapping) == 'reference'
            -G '${reference_file}'
            -M ${VCF_option.M}
            ${VCF_option.R}
        #end if

  ]]></command>

  <inputs>

      <conditional name="input_type_options">
          <param name="input_type" type="select" label="Input options">
              <option value="single">Single end reads</option>
              <option value="paired">Paired end reads</option>
              <option value="mix">Both single and paired reads</option>
          </param>
          <when value="single">
              <param name='list_reads' argument="-r" format="fasta,fastq" type="data" multiple="true" label="Single read files" />
          </when>
          <when value="paired">
              <param name='list_paired_reads' argument="-r" format="fasta,fastq" type="data_collection" collection_type="list:paired" multiple='true' label="List of paired read files" />
          </when>
          <when value="mix">
              <param name='list_reads' argument="-r" format="fasta,fastq" type="data" multiple="true" label="Single read files" />
              <param name='list_paired_reads' argument="-r" format="fasta,fastq" type="data_collection" collection_type="list:paired" multiple='true' label="List of paired read files"/>
          </when>
      </conditional>

      <param argument="-b" type="select" label="Branching strategy">
          <option value="0">variants for which any of the two paths is branching are discarded</option>
          <option value="1">forbid SNPs for wich the two paths are branching</option>
          <option value="2">No limitation on branching</option>
      </param>

      <param argument="-D" type="integer" label="Deletion size" value="0" help="If different of 0, discoSnp++ will search for deletions of size from 1 to D included"/>
      <param argument="-P" type="integer" label="Maximum SNPs per bubble" value="1" help="discoSnp++ will search up to P SNPs in a unique bubble"/>
      <param name="low_complexity" type="boolean" checked="false" truevalue="-l" falsevalue="" label="Remove low complexity bubbles" />
      <param argument="-k" type="integer" label="Size of kmers" value="31" />

      <param argument="-t" type="boolean" checked="false" truevalue="-t" falsevalue="" label="Extends each polymorphism with left and right unitigs" />
      <param argument="-T" type="boolean" checked="false" truevalue="-T" falsevalue="" label="Extends each polymorphism with left and right contigs" />


      <conditional name="coverage_options_type" >
          <param name="coverage_options" type="select" label="Coverage option">
              <option value="auto"></option>
              <option value="custom"></option>
          </param>
          <when value="auto" />
          <when value="custom">
              <param argument="-c" type="text" label="Minimal coverage per read set" value="4" help="e.g. 4 / 4,5,17 / 4,auto,auto"/>
          </when>
      </conditional>

      <param argument="-C" type="integer" label="Maximal coverage per read set" value="2147483647" help="default value = 2^31-1" />
      <param argument="-d" type="integer" label="Max number of errors per read" value="1" help="Max number of errors per read" />

      <conditional name="VCF_option" >
          <param name="mapping" type="select" label="VCF option">
              <option value="default">Do not use reference genome</option>
              <option value="reference">Mapping with a reference genome</option>
          </param>
          <when value="default"></when>
          <when value="reference">
              <param argument="-G" type="data" format="fasta,fastq" label="Reference genome file" />
              <param argument="-M" type="integer" value="4" label="Maximal number of mapping errors" help="during BWA mapping phase" />
              <param argument="-R" type="boolean" truevalue="-R" falsevalue="" checked="false" label="Use the reference genome also in the variant calling, not only for mapping results" />
          </when>
      </conditional>

  </inputs>

  <outputs>
      <data name="vcf" from_work_dir="*_coherent.vcf" format="vcf" label="VCF with ${tool.name} on $on_string"/>
      <data name="fasta" from_work_dir="*_coherent.fa" format="fasta" label="Multifasta with ${tool.name} on $on_string"/>
  </outputs>

  <tests>
      <test>
          <conditional name="input_type_options">
              <param name="input_type" value="single"/>
              <param name="list_reads" value="discosnp/reads1,discosnp/reads2" ftype="fasta" />
          </conditional>
          <param name="k" value="25"/>
          <output name="fasta" file="discosnp/multifasta.fa"/>
          <output name="vcf" file="discosnp/vcf_file.vcf" compare="diff" lines_diff="2"/>
      </test>
      <test>
          <conditional name="input_type_options">
              <param name="input_type" value="paired"/>
              <param name="list_paired_reads">
                  <collection type="list:paired">
                      <element name="Pair1">
                          <collection type="paired">
                              <element name="forward" value="discosnp/reads1" ftype="fasta"/>
                              <element name="reverse" value="discosnp/reads2" ftype="fasta"/>
                          </collection>
                      </element>
                  </collection>
              </param>
          </conditional>
          <conditional name="VCF_option">
              <param name="mapping" value="reference"/>
              <param name="G" value="discosnp/reads1" ftype="fasta"/>
          </conditional>
          <output name="fasta" file="discosnp/multifasta_paired.fa"/>
          <output name="vcf" file="discosnp/vcf_file_paired.vcf" compare="diff" lines_diff="2"/>
      </test>
  </tests>

  <help><![CDATA[

**Description**

Software discoSnp is designed for discovering Single Nucleotide Polymorphism (SNP) from raw set(s) of reads obtained with Next Generation Sequencers (NGS).
Note that number of input read sets is not constrained, it can be one, two, or more. Note also that no other data as reference genome or annotations are needed.
The software is composed by two modules. First module, kissnp2, detects SNPs from read sets. A second module, kissreads, enhance the kissnp2 results by computing per read set  and for each found SNP i/ its mean read coverage and ii/ the (phred) quality of reads generating the polymorphism.

Note that from release of DiscoSnp++-2.0.6, the tool also detects close SNPs and indels.

-------

.. class:: warningmark

**Input parameters**

-Sequences files in fasta or fastq each allele will be counted in each file individually

-Use collections: data list and/or data list paired

-Fasta sequence of a genome if case of you are willing to map the sequence extension on a reference in order to get a compliant VCF

-------

.. class:: warningmark

**Ouput parameters**

-VCF file with coordinates on the higher branch sequences or on a reference genome if provided

-Fasta file with sequence extensions around the SNP.


-------

**Web site**

https://colibread.inria.fr/software/discosnp/

  ]]></help>
    <expand macro="citations">
        <citation type="doi">10.1093/nar/gku1187</citation>
    </expand>
</tool>