changeset 14:32be11207108

Deleted selected files
author rico
date Thu, 05 Apr 2012 15:18:43 -0400
parents 52ca162447b8
children 28049f4eb9f4
files pathway_image.xml pca.xml
diffstat 2 files changed, 0 insertions(+), 110 deletions(-) [+]
line wrap: on
line diff
--- a/pathway_image.xml	Thu Apr 05 15:17:32 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-<tool id="gd_pathway_image" name="Generate" version="1.0.0">
-  <description>KEGG pathway images</description>
-
-  <command interpreter="python">
-    mkpthwpng.py
-      "--input=${input}"
-      "--output=${output}"
-      "--KEGGpath=${pathway}"
-      "--posKEGGclmn=${input.metadata.kegg_path}"
-      "--KEGGgeneposcolmn=${input.metadata.kegg_gene}"
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="wpf" label="Table">
-      <validator type="metadata" check="kegg_gene,kegg_path" message="Missing KEGG gene code column and/or KEGG pathway code/name column metadata.  Click the pencil icon in the history item to edit/save the metadata attributes" />
-    </param>
-    <param name="pathway" type="select">
-      <options from_file="gd.pathways.txt">
-        <column name="value" index="1"/>
-        <column name="name" index="2"/>
-        <filter type="data_meta" ref="input" key="dbkey" column="0" separator="\t" />
-      </options>
-    </param>
-  </inputs>
-
-  <outputs>
-    <data name="output" format="png" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name="input" value="genome_diversity/test_in/sample.wpf" ftype="wpf" />
-      <param name="pathway" value="cfa05214" />
-      <output name="output" file="genome_diversity/test_out/pathway_image/pathway_image.png" compare="sim_size" delta = "10000" />
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-This tool produces an image of an input KEGG pathway, highlighting the
-modules representing genes in an input list.  NOTE:  a given gene can
-be assigned to multiple modules, and different genes can be assigned to
-the same module.
-  </help>
-</tool>
--- a/pca.xml	Thu Apr 05 15:17:32 2012 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,64 +0,0 @@
-<tool id="gd_pca" name="PCA" version="1.0.0">
-
-  <command interpreter="python">
-    pca.py "$input" "$input.extra_files_path" "$output" "$output.extra_files_path"
-  </command>
-
-  <inputs>
-    <param name="input" type="data" format="wped" label="Dataset" />
-  </inputs>
-
-  <outputs>
-    <data name="output" format="html" />
-  </outputs>
-
-  <tests>
-    <test>
-      <param name='input' value='fake' ftype='wped' >
-        <metadata name='base_name' value='admix' />
-        <composite_data value='genome_diversity/test_out/prepare_population_structure/prepare_population_structure.html' />
-        <composite_data value='genome_diversity/test_out/prepare_population_structure/admix.ped' />
-        <composite_data value='genome_diversity/test_out/prepare_population_structure/admix.map' />
-        <edit_attributes type='name' value='fake' />
-      </param>
-
-      <output name="output" file="genome_diversity/test_out/pca/pca.html" ftype="html" compare="diff" lines_diff="2">
-        <extra_files type="file" name='admix.geno' value="genome_diversity/test_out/pca/admix.geno" />
-        <extra_files type="file" name='admix.ind' value="genome_diversity/test_out/pca/admix.ind" />
-        <extra_files type="file" name='admix.snp' value="genome_diversity/test_out/pca/admix.snp" />
-        <extra_files type="file" name='coordinates.txt' value="genome_diversity/test_out/pca/coordinates.txt" />
-        <extra_files type="file" name='explained.txt' value="genome_diversity/test_out/pca/explained.txt" />
-        <extra_files type="file" name='par.admix' value="genome_diversity/test_out/pca/par.admix" compare="diff" lines_diff="10" />
-        <extra_files type="file" name='PCA.pdf' value="genome_diversity/test_out/pca/PCA.pdf" compare="sim_size" delta = "1000" />
-      </output>
-      
-    </test>
-  </tests>
-
-  <help>
-**What it does**
-
-The users selects a set of data generated by the Galaxy tool to "prepare to look for population structure". The PCA tool runs a Principal Component Analysis on the input genotype data and constructs a plot of the top two principal components. It also reports the following estimates of the statistical significance of the analysis.
-
-1. Average divergence between each pair of populations.  Specifically, from the covariance matrix X whose eigenvectors were computed, we can compute a "distance", d, for each pair of individuals (i,j): d(i,j) = X(i,i) + X(j,j) - 2X(i,j).  For each pair of populations (a,b) now define an average distance: D(a,b) = \sum d(i,j) (in pop a, in pop b) / (\|pop a\| * \|pop b\|).  We then normalize D so that the diagonal has mean 1 and report it.
-
-2. Anova statistics for population differences along each eigenvector. For each eigenvector, a P-value for statistical significance of differences between each pair of populations along that eigenvector is printed.  +++ is used to highlight P-values less than 1e-06.  \*\*\* is used to highlight P-values between 1e-06 and 1e-03.  If there are more than 2 populations, then an overall P-value is also printed for that eigenvector, as are the populations with minimum (minv) and maximum (maxv) eigenvector coordinate. [If there is only 1 population, no Anova statistics are printed.] 
-
-3. Statistical significance of differences between populations. For each pair of populations, the above Anova statistics are summed across eigenvectors. The result is approximately chisq with d.o.f. equal to the number of eigenvectors. The chisq statistic and its p-value are printed. [If there is only 1 population, no statistics are printed.]
-
-We post-process the output of the PCA tool to estimate "admixture fractions".  For this, we take three populations at a time and determine each one's average point in the PCA plot (by separately averaging first and second coordinates).  For each combination of two center points, modeling two ancestral populations, we try to model the third central point as having a certain fraction, r, of its SNP genotypes from the second ancestral population and the remainder from the first ancestral population, where we estimate r.  The output file "coordinates.txt" then contains pairs of lines like
-
-projection along chord Population1 -> Population2
-  Population3: 0.12345
-
-where the number (in this case 0.1245) is the estimation of r.  Computations with simulated data suggests that the true r is systematically underestimated, perhaps giving roughly 0.6 times r.
-
-**Acknowledgments**
-
-We use the programs "smartpca" and "ploteig" downloaded from
-
-http://genepath.med.harvard.edu/~reich/Software.htm
-
-and described in the paper "Population structure and eigenanalysis". by Nick Patterson, Alkes L.Price and David Reich, PLoS Genetics, 2 (2006), e190.
-  </help>
-</tool>