view vcf_to_alignment.xml @ 4:f58178c0f00d draft

planemo upload for repository https://github.com/sanbi-sa/tools-sanbi-uwc commit 3e5c71977e50ec920d4f45be809d2528e55bff76
author sanbi-uwc
date Mon, 15 Oct 2018 00:34:44 -0400
parents 62fbd3f96b30
children 4f3f7d390382
line wrap: on
line source

<?xml version="1.0" encoding="utf-8" ?>
<tool id="vcf_to_alignment" name="Generate FASTA alignment from VCF collection" version="0.4">
  <description>Generate a multiple sequence alignment given a collection of variants and a reference sequence</description>
  <requirements>
      <requirement type="package" version="1.67">biopython</requirement>
      <requirement type="package" version="0.6.8">pyvcf</requirement>
      <requirement type="package" version="2.1.0">intervaltree</requirement>
  </requirements>
  <command detect_errors="aggressive"><![CDATA[
    #if str($reference.source) == 'history':
      ln -s '${reference.history}' reference.fasta &&
    #end if
    python $__tool_directory__/vcf_to_msa.py
    $remove_invariant_sites
    --vcf_files
    #for $vcf_file in $vcf_inputs:
      '${vcf_file.element_identifier}^^^${vcf_file}'
    #end for
    --reference_file
    #if str($reference.source) == 'history':
      reference.fasta
    #else
      '${reference.builtin.fields.path}'
    #end if
    #if $exclude_list.do_exclude == 'yes':
      --exclude $exclude_list.exclude_file
    #end if
    --output_file '${output_alignment}'
    ]]>
  </command>
  <inputs>
    <param name="vcf_inputs" type="data_collection" format="vcf" collection_type="list" label="Variants (VCF format)" />
    <param name="remove_invariant_sites" type="boolean" truevalue="--remove_invariant" falsevalue="" label="Remove invariant sites from alignment" />
    <conditional name="reference" label="Reference sequence source">
      <param name="source" type="select">
        <option value="history" selected="True">History</option>
        <option value="builtin">Built-in</option>
      </param>
      <when value="history">
        <param name="history" type="data" format="fasta" label="Reference sequence" />
      </when>
      <when value="builtin">
        <param name="builtin" type="select" label="Reference sequence (FASTA format)">
          <options from_data_table="all_fasta" />
          <validator type="no_options" message="No FASTA datasets are available for the selected input dataset" />
        </param>
      </when>
    </conditional>
    <conditional name="exclude_list">
      <param name="do_exclude" type="select">
        <option value="yes">Yes</option>
        <option value="no" selected="true">No</option>
      </param>
      <when value="yes">
        <param name="exclude_file" type="data" format="bed" label="BED file with regions to exclude" />
      </when>
      <when value="no"></when>
    </conditional>
  </inputs>
  <outputs>
    <data name="output_alignment" format="fasta" label="Alignment ${on_string}" />
  </outputs>
  <tests>
    <test>
      <param name="remove_invariant_sites" value="False" />
      <param name="do_exclude" value="no" />
      <param name="vcf_inputs">
        <collection type="list">
          <element name="vcf_inputs.vcf1" value="vcf1.vcf" />
          <element name="vcf_inputs.vcf2" value="vcf2.vcf" />
          <element name="vcf_inputs.vcf3" value="vcf3.vcf" />
        </collection>
      </param>
      <param name="history" value="reference.fasta" ftype="fasta" />
      <output name="output_alignment" value="output1.fasta" />
    </test>
    <test>
      <param name="remove_invariant_sites" value="True" />
      <param name="do_exclude" value="no" />
      <param name="vcf_inputs">
        <collection type="list">
          <element name="vcf_inputs.vcf1" value="vcf1.vcf" />
          <element name="vcf_inputs.vcf2" value="vcf2.vcf" />
          <element name="vcf_inputs.vcf3" value="vcf3.vcf" />
        </collection>
      </param>
      <param name="history" value="reference.fasta" ftype="fasta" />
      <output name="output_alignment" value="output2.fasta" />
    </test>
    <test>
      <param name="remove_invariant_sites" value="True" />
      <param name="do_exclude" value="yes" />
      <param name="exclude_file" value="exclude1.bed" />
      <param name="vcf_inputs">
        <collection type="list">
          <element name="vcf_inputs.vcf1" value="vcf1.vcf" />
          <element name="vcf_inputs.vcf2" value="vcf2.vcf" />
          <element name="vcf_inputs.vcf3" value="vcf3.vcf" />
        </collection>
      </param>
      <param name="history" value="reference.fasta" ftype="fasta" />
      <output name="output_alignment" value="output3.fasta" />
    </test>
  </tests>
  <help><![CDATA[
    Using the SNPs identified by the VCF files given as input, generates a sequence including the
    SNPs by combining them with the reference and then combines the sequences into a FASTA
    format alignment file.
    ]]>
  </help>
  <citations></citations>
</tool>