# HG changeset patch # User rico # Date 1333652826 14400 # Node ID e49ab320694a3d722651f1a9a9ec6b069fbe6e69 # Parent 3cc35686acfb6b7c2eac83d48e86545dff1bdf27 Uploaded diff -r 3cc35686acfb -r e49ab320694a average_fst.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/average_fst.xml Thu Apr 05 15:07:06 2012 -0400 @@ -0,0 +1,112 @@ + + of two populations + + + average_fst.py "$input" "$p1_input" "$p2_input" "$data_source.ds_choice" "$data_source.min_value" "$discard_fixed" "$biased" "$output" + #if $use_randomization.ur_choice == '1' + "$use_randomization.shuffles" "$use_randomization.p0_input" + #else + "0" "/dev/null" + #end if + #for $individual, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) + #set $arg = '%s:%s' % ($individual_col, $individual) + "$arg" + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +The user specifies a SNP table and two "populations" of individuals, +both previously defined using the Galaxy tool to select individuals from +a SNP table. No individual can be in both populations. Other choices are +as follows. + +Data source. The allele frequencies of a SNP in the two populations can be +estimated either by the total number of reads of each allele, or by adding +the frequencies inferred from genotypes of individuals in the populations. + +After specifying the data source, the user sets lower bounds on amount +of data required at a SNP. For estimating the Fst using read counts, +the bound is the minimum count of reads of the two alleles in a population. +For estimations based on genotype, the bound is the minimum reported genotype +quality per individual. SNPs not meeting these lower bounds are ignored. + +The user specifies whether SNPs where both populations appear to be fixed +for the same allele should be retained or discarded. + +The user chooses which definition of Fst to use: Wright's original definition +or Weir's unbiased estimator. + +Finally, the user decides whether to use randomizations. If so, then the +user specifies how many randomly generated population pairs (retaining +the numbers of individuals of the originals) to generate, as well as the +"population" of additional individuals (not in the first two populations) +that can be used in the randomization process. + +The program prints the average Fst for the original populations and the +number of SNPs used to compute it. If randomizations were requested, +it prints the average Fst for each randomly generated population pair, +ending with a summary that includes the maximum and average value, and the +highest-scoring population pair. + +