| 
0
 | 
     1 <tool id="gd_add_fst_column" name="Add an FST column" version="1.0.0">
 | 
| 
 | 
     2   <description>to a table</description>
 | 
| 
 | 
     3 
 | 
| 
 | 
     4   <command interpreter="python">
 | 
| 
 | 
     5     add_fst_column.py "$input" "$p1_input" "$p2_input" "$data_source" "$min_reads" "$min_qual" "$retain" "$discard_fixed" "$biased" "$output"
 | 
| 
 | 
     6     #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
 | 
| 
 | 
     7         #set $arg = '%s:%s' % ($individual_col, $individual)
 | 
| 
 | 
     8         "$arg"
 | 
| 
 | 
     9     #end for
 | 
| 
 | 
    10   </command>
 | 
| 
 | 
    11 
 | 
| 
 | 
    12   <inputs>
 | 
| 
 | 
    13     <param name="input" type="data" format="wsf" label="SNP table" />
 | 
| 
 | 
    14     <param name="p1_input" type="data" format="ind" label="Population 1 individuals" />
 | 
| 
 | 
    15     <param name="p2_input" type="data" format="ind" label="Population 2 individuals" />
 | 
| 
 | 
    16 
 | 
| 
 | 
    17     <param name="data_source" type="select" format="integer" label="Data source">
 | 
| 
 | 
    18       <option value="0" selected="true">sequence coverage</option>
 | 
| 
 | 
    19       <option value="1">estimated genotype</option>
 | 
| 
 | 
    20     </param>
 | 
| 
 | 
    21 
 | 
| 
 | 
    22     <param name="min_reads" type="integer" min="0" value="0" label="Minimum total read count for a population" />
 | 
| 
 | 
    23     <param name="min_qual" type="integer" min="0" value="0" label="Minimum individual genotype quality" />
 | 
| 
 | 
    24 
 | 
| 
 | 
    25     <param name="retain" type="select" label="Special treatment">
 | 
| 
 | 
    26       <option value="0" selected="true">Skip row</option>
 | 
| 
 | 
    27       <option value="1">Set FST = -1</option>
 | 
| 
 | 
    28     </param>
 | 
| 
 | 
    29 
 | 
| 
 | 
    30     <param name="discard_fixed" type="select" label="Apparently fixed SNPs">
 | 
| 
 | 
    31       <option value="0">Retain SNPs that appear fixed in the two populations</option>
 | 
| 
 | 
    32       <option value="1" selected="true">Delete SNPs that appear fixed in the two populations</option>
 | 
| 
 | 
    33     </param>
 | 
| 
 | 
    34 
 | 
| 
 | 
    35     <param name="biased" type="select" label="FST estimator">
 | 
| 
 | 
    36       <option value="0" selected="true">Wright's original definition</option>
 | 
| 
 | 
    37       <option value="1">Weir's unbiased estimator</option>
 | 
| 
 | 
    38     </param>
 | 
| 
 | 
    39 
 | 
| 
 | 
    40   </inputs>
 | 
| 
 | 
    41 
 | 
| 
 | 
    42   <outputs>
 | 
| 
 | 
    43     <data name="output" format="wsf" metadata_source="input" />
 | 
| 
 | 
    44   </outputs>
 | 
| 
 | 
    45 
 | 
| 
 | 
    46   <tests>
 | 
| 
 | 
    47     <test>
 | 
| 
 | 
    48       <param name="input" value="genome_diversity/test_in/sample.wsf" ftype="wsf" />
 | 
| 
 | 
    49       <param name="p1_input" value="genome_diversity/test_in/a.ind" ftype="ind" />
 | 
| 
 | 
    50       <param name="p2_input" value="genome_diversity/test_in/b.ind" ftype="ind" />
 | 
| 
 | 
    51       <param name="data_source" value="0" />
 | 
| 
 | 
    52       <param name="min_reads" value="3" />
 | 
| 
 | 
    53       <param name="min_qual" value="0" />
 | 
| 
 | 
    54       <param name="retain" value="0" />
 | 
| 
 | 
    55       <param name="discard_fixed" value="1" />
 | 
| 
 | 
    56       <param name="biased" value="0" />
 | 
| 
 | 
    57       <output name="output" file="genome_diversity/test_out/add_fst_column/add_fst_column.wsf" />
 | 
| 
 | 
    58     </test>
 | 
| 
 | 
    59   </tests>
 | 
| 
 | 
    60 
 | 
| 
 | 
    61   <help>
 | 
| 
 | 
    62 **What it does**
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 The user specifies a SNP table and two "populations" of individuals,
 | 
| 
 | 
    65 both previously defined using the Galaxy tool to select individuals from
 | 
| 
 | 
    66 a SNP table.  No individual can be in both populations.  Other choices are
 | 
| 
 | 
    67 as follows.
 | 
| 
 | 
    68 
 | 
| 
 | 
    69 Data source.  The allele frequencies of a SNP in the two populations can be
 | 
| 
 | 
    70 estimated either by the total number of reads of each allele, or by adding
 | 
| 
 | 
    71 the frequencies inferred from genotypes of individuals in the populations.
 | 
| 
 | 
    72 
 | 
| 
 | 
    73 After specifying the data source, the user sets lower bounds on amount
 | 
| 
 | 
    74 of data required at a SNP.  For estimating the Fst using read counts,
 | 
| 
 | 
    75 the bound is the minimum count of reads of the two alleles in a population.
 | 
| 
 | 
    76 For estimations based on genotype, the bound is the minimum reported genotype
 | 
| 
 | 
    77 quality per individual.
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 The user specifies whether the SNPs that violate the lower bound should be
 | 
| 
 | 
    80 ignored or the Fst set to -1.
 | 
| 
 | 
    81 
 | 
| 
 | 
    82 The user specifies whether SNPs where both populations appear to be fixed
 | 
| 
 | 
    83 for the same allele should be retained or discarded.
 | 
| 
 | 
    84 
 | 
| 
 | 
    85 Finally, the user chooses which definition of Fst to use:  Wright's original
 | 
| 
 | 
    86 definition or Weir's unbiased estimator.
 | 
| 
 | 
    87 
 | 
| 
 | 
    88 A column is appended to the SNP table giving the Fst for each retained SNP.
 | 
| 
 | 
    89   </help>
 | 
| 
 | 
    90 </tool>
 |