changeset 4:884befe75dcb

Uploaded
author rico
date Thu, 05 Apr 2012 15:09:12 -0400
parents 0b4758202a61
children feeeee14729e
files evaluate_population_numbers.xml
diffstat 1 files changed, 54 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/evaluate_population_numbers.xml	Thu Apr 05 15:09:12 2012 -0400
@@ -0,0 +1,54 @@
+<tool id="gd_evaluate_population_numbers" name="Evaluate" version="1.0.0">
+  <description>possible numbers of populations</description>
+
+  <command interpreter="bash">
+    evaluate_population_numbers.bash "${input.extra_files_path}/admix.ped" "$output" "$max_populations"
+  </command>
+
+  <inputs>
+    <param name="input" type="data" format="wped" label="Dataset" />
+    <param name="max_populations" type="integer" min="1" value="5" label="Maximum number of populations" />
+  </inputs>
+
+  <outputs>
+    <data name="output" format="txt" />
+  </outputs>
+
+  <tests>
+    <test>
+      <param name='input' value='fake' ftype='wped' >
+        <metadata name='base_name' value='admix' />
+        <composite_data value='genome_diversity/test_out/prepare_population_structure/prepare_population_structure.html' />
+        <composite_data value='genome_diversity/test_out/prepare_population_structure/admix.ped' />
+        <composite_data value='genome_diversity/test_out/prepare_population_structure/admix.map' />
+        <edit_attributes type='name' value='fake' />
+      </param>
+      <param name='max_populations' value='2' />
+
+      <output name="output" file="genome_diversity/test_out/evaluate_population_numbers/evaluate_population_numbers.txt" />
+    </test>
+  </tests>
+
+  <help>
+**What it does**
+
+The users selects a set of data generated by the Galaxy tool to "prepare
+to look for population structure".  For all possible numbers K of ancestral
+populations, from 1 up to a user-specified maximum, this tool produces values
+that indicate how well the data can be explained as genotypes from individuals
+derived from K ancestral populations.  These values are computed by a 5-fold
+cross-validation procedure, so that a good choice for K will exhibit a low
+cross-validation error compared with other potential settings for K.
+
+**Acknowledgments**
+
+We use the program "Admixture", downloaded from
+
+http://www.genetics.ucla.edu/software/admixture/
+
+and described in the paper "Fast model-based estimation of ancestry in
+unrelated individuals" by David H. Alexander, John Novembre and Kenneth Lange,
+Genome Research 19 (2009), pp. 1655-1664. Admixture is called with the "--cv"
+flag to produce these values.
+  </help>
+</tool>