Mercurial > repos > veg > tn93

<?xml version="1.0"?>
<tool id="tn93" version="1.0.0" name="TN93">
  <description/>
  <requirements>
    <requirement type="package" version="1.0.4">tn93</requirement>
  </requirements>
  <stdio>
    <exit_code range="1:"/>
  </stdio>
  <version_command/>
  <command><![CDATA[
        tn93 -o "$tn93_distances"
        #if str($options.advanced) == "advanced":
          -t $options.threshold -a $options.ambigs -g $options.fraction -f $format
          -l $options.overlap
          #if len(str($options.counts_in_name)) > 0:
            -d $options.counts_in_name
          #end if
        #end if
        "$input_fasta"
    ]]></command>
  <inputs>
    <param name="input_fasta" label="Input FASTA" type="data" format="fasta"/>
    <param label="Output format" name="format" type="select">
      <option value="csv">csv</option>
      <option value="csvn">csvn</option>
      <option value="hyphy">hyphy</option>
    </param>
    <conditional name="options">
      <param label="Additional options" name="advanced" type="select">
        <option value="defaults">Use defaults</option>
        <option value="advanced">Specify additional parameters</option>
      </param>
      <when value="defaults"/>
      <when value="advanced">
        <param label="THRESHOLD" name="threshold" type="float" value="0.015" default="0.015"/>
        <param label="AMBIGS" name="ambigs" type="select">
          <option value="handle">handle</option>
          <option value="resolve">resolve</option>
          <option value="average">average</option>
          <option value="skip">skip</option>
          <option value="gapmm">gapmm</option>
        </param>
        <param label="FRACTION" name="fraction" type="float" value="1.0" default="1.0"/>
        <param label="OVERLAP" name="overlap" type="integer" value="100" default="100"/>
        <param label="COUNTS_IN_NAME" name="counts_in_name" type="text"/>
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="csv" name="tn93_distances">
      <change_format>
        <when input="format" value="csvn" format="csvn"/>
        <when input="format" value="hyphy" format="hyphy"/>
      </change_format>
    </data>
  </outputs>
  <tests>
    <test>
      <!--       <param name="history_reference" value="tn93-in-ref-1.fa"/> -->
      <param name="input_fasta" value="tn93-in1.fa"/>
      <param name="advanced" value="advanced"/>
      <param name="threshold" value="0.35"/>
      <output file="tn93-out1.csv" ftype="csv" name="tn93_distances" lines_diff="32"/>
    </test>
  </tests>
  <help><![CDATA[
		optional arguments:
			-h, --help               show this help message and exit
			-o OUTPUT                direct the output to a file named OUTPUT (default=stdout)
			-t THRESHOLD             only report (count) distances below this threshold (>=0, default=0.015)
			-a AMBIGS                handle ambigous nucleotides using one of the following strategies (default=resolve)
															 resolve: resolve ambiguities to minimize distance (e.g.R matches A);
															 average: average ambiguities (e.g.R-A is 0.5 A-A and 0.5 G-A);
															 skip: do not include sites with ambiguous nucleotides in distance calculations;
															 gapmm: a gap ('-') matched to anything other than another gap is like matching an N (4-fold ambig) to it;
															 a string (e.g. RY): any ambiguity in the list is RESOLVED; any ambiguitiy NOT in the list is averaged (LIST-NOT LIST will also be averaged);
			-g FRACTION              in combination with AMBIGS, works to limit (for resolve and string options to AMBIG)
															 the maximum tolerated FRACTION of ambiguous characters; sequences whose pairwise comparisons
															 include no more than FRACTION [0,1] of sites with resolvable ambiguities will be resolved
															 while all others will be AVERAGED (default = 1.0)
			-f FORMAT                controls the format of the output unless -c is set (default=csv)
															 csv: seqname1, seqname2, distance;
															 csvn: 1, 2, distance;
															 hyphy: {{d11,d12,..,d1n}...{dn1,dn2,...,dnn}}, where distances above THRESHOLD are set to 100;
			-l OVERLAP               only process pairs of sequences that overlap over at least OVERLAP nucleotides (an integer >0, default=100):
			-d COUNTS_IN_NAME        if sequence name is of the form 'somethingCOUNTS_IN_NAMEinteger' then treat the integer as a copy number
															 when computing distance histograms (a character, default=':'):
			-s SECOND_FASTA          if specified, read another FASTA file from SECOND_FASTA and perform pairwise comparison BETWEEN the files (default=NULL)
			-b                       bootstrap alignment columns before computing distances (default = false)
															 when -s is supplied, permutes the assigment of sequences to files
															 interacts with -r option
			-r                       if -b is specified AND -s is supplied, using -r will bootstrap across sites
															 instead of allocating sequences to 'compartments' randomly
			-c                       only count the pairs below a threshold, do not write out all the pairs
			-m                       compute inter- and intra-population means suitable for FST caclulations
															 only applied when -s is used to provide a second file
			-u PROBABILITY           subsample sequences with specified probability (a value between 0 and 1, default = 1.0)
			-q                       do not report progress updates and other diagnostics to stderr
			FASTA                    read sequences to compare from this file (default=stdin)
]]></help>
</tool>