view phylorelatives.xml @ 41:9453ba52f46b draft

tool now uses multiple selection for input fasta files
author boris
date Fri, 19 Jul 2013 17:46:03 -0400
parents c340422b78b1
children
line wrap: on
line source

<tool id="phylorelatives" name="Phylorelatives" version="0.0.1">
  <description>Relatedness of minor alelle sequences in NJ tree</description>
  <requirements>
    <requirement type="package" version="2.15.0">R</requirement>
    <requirement type="package" version="2.2.6">rpy2</requirement>
    <requirement type="package" version="3.12.0">dendropy</requirement>
    <requirement type="package" version="3.0-8">ape</requirement>
  </requirements>
  <command interpreter="python">phylorelatives.py
     #for $sequence in ( $sequences ):
         -i "${sequence}"
     #end for
     -m "${multifasta}"
     -b "$iterations"
     $pairwise
     $major_only
     #if str($root.root_selector)=="add_root":
      -r "${root.root_fa}"
     #end if
     --relatives-out "$siblings"
     --newick-out "$newick"
     --trees-out "$plot"
  </command>
  <inputs>
      <param format="fasta" name="sequences" type="data" label="FASTA file" multiple="TRUE" help="Minor allele sequences must be labeled &quot;_minor&quot; or &quot;_test&quot; e.g. &quot;>sample1_minor&quot;">
      </param>
    <conditional name="root">
      <param name="root_selector" type="select" label=" Root sequence">
        <option value="unrooted" selected="True"></option>
        <option value="add_root"></option>
      </param>
      <when value="unrooted">
      </when>           
      <when value="add_root">
        <param name="root_fa" format="fasta" type="data"  label="Root tree using" help="Must have the same length as input sequences"/>
      </when>
    </conditional>
    <param name="pairwise" type="boolean" truevalue="-p" falsevalue="" checked="False" label="Use pairwise deletion of gaps/missing data" help="Defaults to &quot;Complete deletion&quot;"/>
    <param name="iterations" type="integer" value="1000" label="Change number of bootstrap replicas" help="0 to deactivate"/>
    <param name="major_only" type="boolean" truevalue="-j" falsevalue="" checked="False" label="Major-alleles-only mode" help="All sequences will be treated as major allele sequences. Useful to generate background tree"/>
  </inputs>
  <outputs>
    <data format="fasta" name="multifasta" label="${tool.name} on ${on_string}: multifasta used" />
    <data format="tabular" name="siblings" label="${tool.name} on ${on_string}: relatives list"/>
    <data format="txt" name="newick" label="${tool.name} on ${on_string}: newick string"/>
    <data format="png" name="plot"  label="${tool.name} on ${on_string}: tree plot"/>
  </outputs>
  <tests>
    <test>
      <param name="iterations" value="0"/>
      <param name="sequences" value="test.fa"/>
      <output name="siblings" file="test.fa-siblings.tab"/>
    </test>
  </tests>

  <help>
    


The major and minor allele sequences of a sample are expected to cluster together in a phylogenetic tree.
Deviation from expectation suggests potential contamination coming from the closest unrelated samples.

-----

.. class:: infomark

**What it does**

Constructs relatedness of a set of sequences based on the pairwise proportion of different sites.
One or more test sequences are accepted as long as their name include the strict suffix "_minor" or "_test" (i.e. >seq1_minor).
It returns the FASTA multiple alignment used, and reports a table with the closest major allele relatives, the tree plot and newick string of the tree.

-----

.. class:: warningmark

**Note**

This tools DOES NOT align the sequences.
Consequently, same length homologous sequences are required as input to fabricate a FASTA multiple alignment by concatenation of individual FASTA files.

-----

.. class:: infomark

**About formats**

*FASTA multiple alignment*

See http://www.bioperl.org/wiki/FASTA_multiple_alignment_format

*Newick*

http://www.megasoftware.net/WebHelp/glossary/rh_newick_format.htm

-----

**Example**

- For the multiple alignment composed of the following FASTA files::

   >sample1_major
   >sample1_minor
   >sample2_major
   >sample3_major
   >sample4_major
  

- running this tool with *root = RSRS.fasta*, and default parameters will return four datasets::
 
   1. multiple alignment file used for running the tool
   2. relatives of sample1_minor
   3. NJ tree newick string (rooted on RSRS)
   4. NJ tree png plot (rooted on RSRS)

-----

**Citation**

If you use this tool, please cite Dickins B, Rebolledo-Jaramillo B, et al. *In preparation.*
(boris-at-bx.psu.edu)

  </help>

</tool>