Mercurial > repos > iuc > scanpy_normalize
diff normalize.xml @ 12:0ac2f7d40040 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
author | iuc |
---|---|
date | Wed, 31 Jul 2024 18:08:37 +0000 |
parents | 51f9a8b21134 |
children | 381401225cbc |
line wrap: on
line diff
--- a/normalize.xml Wed Sep 22 21:06:01 2021 +0000 +++ b/normalize.xml Wed Jul 31 18:08:37 2024 +0000 @@ -1,9 +1,9 @@ -<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@"> - <description>with scanpy</description> - <expand macro="bio_tools"/> +<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> + <description>and impute with scanpy</description> <macros> <import>macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @@ -17,17 +17,17 @@ #if $method.method == "pp.normalize_total" sc.pp.normalize_total( adata, - #if str($method.target_sum)!= '' + #if str($method.target_sum) != '' target_sum=$method.target_sum, #end if exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" max_fraction=$method.exclude_highly_expressed.max_fraction, #end if - #if str($method.key_added) != '' + #if $method.key_added key_added='$method.key_added', #end if - #if str($method.layers) != '' + #if $method.layers #if str($method.layers) != 'all' layers[str(x.strip()) for x in str($method.layers).split(',')], #else @@ -65,6 +65,29 @@ plot=False, copy=False) +#else if $method.method == "external.pp.magic" +sc.external.pp.magic( + adata=adata, + name_list='$method.name_list', + knn=$method.knn, + #if str($method.decay) != '' + decay=$method.decay, + #end if + #if str($method.knn_max) != '' + knn_max=$method.knn_max, + #end if + #if $method.t == -1 + t='auto', + #else + t=$method.t, + #end if + #if str($method.n_pca) != '' + n_pca=$method.n_pca, + #end if + solver='$method.solver', + knn_dist='$method.knn_dist', + random_state=$method.random_state, + copy=False) #end if @CMD_anndata_write_outputs@ @@ -79,6 +102,7 @@ <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> + <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option> </param> <when value="pp.normalize_total"> <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> @@ -119,6 +143,29 @@ <when value="pp.recipe_seurat"> <expand macro="param_log"/> </when> + <when value="external.pp.magic"> + <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> + <option value="all_genes">All genes</option> + <option value="pca_only">PCA only</option> + </param> + <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/> + <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" + help="If not set, alpha decaying kernel is not used" /> + <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" + help="If not set, will be set to 3 * knn" /> + <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" + help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." /> + <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" + help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." /> + <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> + <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option> + <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> + </param> + <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> + <expand macro="distance_metric_options"/> + </param> + <expand macro="param_random_state"/> + </when> </conditional> <expand macro="inputs_common_advanced"/> </inputs> @@ -126,8 +173,8 @@ <expand macro="anndata_outputs"/> </outputs> <tests> - <test> - <!-- test 0 --> + <test expect_num_outputs="2"> + <!-- test 1 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.normalize_total"/> @@ -151,8 +198,8 @@ </output> <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test> - <!-- test 1 --> + <test expect_num_outputs="2"> + <!-- test 2 --> <param name="adata" value="random-randint.h5ad"/> <conditional name="method"> <param name="method" value="pp.recipe_zheng17"/> @@ -171,8 +218,8 @@ </output> <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> </test> - <test> - <!-- test 2 --> + <test expect_num_outputs="2"> + <!-- test 3 --> <param name="adata" value="paul15_subsample.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_weinreb17"/> @@ -199,8 +246,8 @@ </output> <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test> - <!-- test 3 --> + <test expect_num_outputs="2"> + <!-- test 4 --> <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_seurat"/> @@ -217,6 +264,53 @@ </output> <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> </test> + <test expect_num_outputs="2"> + <!-- test 5 --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="external.pp.magic"/> + <param name="name_list" value="all_genes"/> + <param name="t" value="-1"/> + <param name="n_pca" value="5"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="external.pp.magic"/> + <has_text_matching expression="name_list='all_genes'"/> + <has_text_matching expression="t='auto'"/> + <has_text_matching expression="n_pca=5"/> + </assert_contents> + </output> + <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- test 6 --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="external.pp.magic"/> + <param name="name_list" value="pca_only"/> + <param name="t" value="3"/> + <param name="n_pca" value="5"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="external.pp.magic"/> + <has_text_matching expression="name_list='pca_only'"/> + <has_text_matching expression="t=3"/> + <has_text_matching expression="n_pca=5"/> + </assert_contents> + </output> + <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> + <assert_stdout> + <has_text text="X_magic"/> + </assert_stdout> + </test> </tests> <help><![CDATA[ Normalize total counts per cell (`pp.normalize_per_cell`) @@ -228,7 +322,7 @@ Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) @@ -247,7 +341,7 @@ - scale to unit variance and shift to zero mean More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_zheng17.html>`__ Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) @@ -256,7 +350,7 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_weinreb17.html>`__ Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) @@ -267,7 +361,21 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_seurat.html>`__ + + +Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) +============================================================================================================ + +MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. + +The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). + +- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. +- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/stable/api/scanpy.external.pp.magic.html>`__ ]]></help> <expand macro="citations"/>