diff cluster_reduce_dimension.xml @ 12:9740d430d9f3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
author iuc
date Wed, 31 Jul 2024 18:05:14 +0000
parents afdac2e27f1b
children b43f391d2919
line wrap: on
line diff
--- a/cluster_reduce_dimension.xml	Wed Sep 22 21:02:36 2021 +0000
+++ b/cluster_reduce_dimension.xml	Wed Jul 31 18:05:14 2024 +0000
@@ -1,6 +1,5 @@
-<tool id="scanpy_cluster_reduce_dimension" name="Cluster, infer trajectories and embed" version="@galaxy_version@" profile="@profile@">
+<tool id="scanpy_cluster_reduce_dimension" name="Cluster, infer trajectories and embed" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
     <description>with scanpy</description>
-    <expand macro="bio_tools"/>
     <macros>
         <import>macros.xml</import>
         <xml name="pca_inputs">
@@ -20,7 +19,7 @@
                     <param argument="zero_center" type="boolean" truevalue="True" falsevalue="False" checked="true"
                         label="Compute standard PCA from covariance matrix?"
                         help="If not, it omits zero-centering variables (uses *TruncatedSVD* from scikit-learn), which allows to handle sparse input efficiently."/>
-                    <expand macro="svd_solver"/> 
+                    <expand macro="svd_solver"/>
                     <param argument="random_state" type="integer" value="0" label="Initial states for the optimization" help=""/>
                 </when>
             </conditional>
@@ -61,8 +60,8 @@
     use_highly_variable=$method.use_highly_variable
 ]]></token>
     </macros>
+    <expand macro="bio_tools"/>
     <expand macro="requirements">
-        <requirement type="package" version="0.7.0">louvain</requirement>
     </expand>
     <expand macro="version_command"/>
     <command detect_errors="exit_code"><![CDATA[
@@ -111,7 +110,7 @@
 #else if $method.method == 'tl.tsne'
 sc.tl.tsne(
     adata=adata,
-    #if $method.n_pcs
+    #if str($method.n_pcs) != ''
     n_pcs=$method.n_pcs,
     #end if
     perplexity=$method.perplexity,
@@ -127,7 +126,7 @@
     min_dist=$method.min_dist,
     spread=$method.spread,
     n_components=$method.n_components,
-    #if $method.maxiter
+    #if str($method.maxiter) != ''
     maxiter=$method.maxiter,
     #end if
     alpha=$method.alpha,
@@ -138,7 +137,6 @@
     copy=False)
 
 #else if $method.method == 'tl.draw_graph'
-
     #if str($method.adjacency) != 'None'
 from scipy import io
 adjacency = io.mmread('$method.adjacency')
@@ -147,18 +145,18 @@
 sc.tl.draw_graph(
     adata=adata,
     layout='$method.layout',
-#if str($method.root) != ''
+#if $method.root
     #set $root=([int(x.strip()) for x in str($method.root).split(',')])
     root=$root,
 #end if
     random_state=$method.random_state,
-    #if str($method.init_pos) != ''
+    #if $method.init_pos
     init_pos='$method.init_pos',
     #end if
     #if str($method.adjacency) != 'None'
     adjacency=adjacency,
     #end if
-    #if str($method.key_ext) != ''
+    #if $method.key_ext
     key_ext='$method.key_ext',
     #end if
     copy=False)
@@ -179,6 +177,19 @@
     min_group_size=$method.min_group_size,
     allow_kendall_tau_shift=$method.allow_kendall_tau_shift,
     copy=False)
+
+#else if $method.method == "tl.embedding_density"
+sc.tl.embedding_density(
+    adata=adata,
+    basis='$method.basis',
+#if $method.groupby
+    groupby='$method.groupby',
+#end if
+#if $method.key_added
+    key_added='$method.key_added',
+#end if
+    )
+
 #end if
 
 @CMD_anndata_write_outputs@
@@ -198,6 +209,7 @@
                 <option value="tl.draw_graph">Force-directed graph drawing, using 'tl.draw_graph'</option>
                 <option value="tl.dpt">Infer progression of cells through geodesic distance along the graph, using 'tl.dpt'</option>
                 <option value="tl.paga">Generate cellular maps of differentiation manifolds with complex topologies, using 'tl.paga'</option>
+                <option value="tl.embedding_density">Calculate the density of cells in an embedding (per condition)</option>
             </param>
             <when value="tl.louvain">
                 <conditional name="flavor">
@@ -282,21 +294,32 @@
                 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations ('adata.obs').">
                     <expand macro="sanitize_query" />
                 </param>
-                <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
+                <param argument="use_rna_velocity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
                 <param argument="model" type="select" label="PAGA connectivity model" help="">
                     <option value="v1.2">v1.2</option>
                     <option value="v1.0">v1.0</option>
                 </param>
             </when>
-        </conditional>            
+            <when value="tl.embedding_density">
+                <param argument="basis" type="text" value="umap" label="The embedding over which the density will be calculated." help="This embedded representation should be found in adata.obsm['X_[basis]']">
+                    <expand macro="sanitize_query" />
+                </param>
+                <param argument="groupby" type="text" optional="true" value="" label="Key for categorical observation/cell annotation for which densities are calculated per category." >
+                    <expand macro="sanitize_query" />
+                </param>
+                <param argument="key_added" type="text" optional="true" value="" label="Name of the .obs covariate that will be added with the density estimates.">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
         <expand macro="inputs_common_advanced"/>
     </inputs>
     <outputs>
         <expand macro="anndata_outputs"/>
     </outputs>
     <tests>
-        <test>
-            <!-- test 0 -->
+        <test expect_num_outputs="2">
+            <!-- test 1 -->
             <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.louvain"/>
@@ -326,8 +349,8 @@
             </output>
             <output name="anndata_out" file="tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 1 -->
+        <test expect_num_outputs="2">
+            <!-- test 2 -->
             <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.leiden"/>
@@ -352,8 +375,8 @@
             </output>
             <output name="anndata_out" file="tl.leiden.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 1 -->
+        <test expect_num_outputs="2">
+            <!-- test 3 -->
             <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
                 <param name="method" value="pp.pca"/>
@@ -384,8 +407,8 @@
             </output>
             <output name="anndata_out" file="pp.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.15"/>
         </test>
-        <!--<test>
-            < test 3 >
+        <test expect_num_outputs="2">
+            <!-- test 4 -->
             <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
                 <param name="method" value="pp.pca"/>
@@ -397,21 +420,24 @@
                 </conditional>
                 <param name="use_highly_variable" value="false"/>
             </conditional>
-            <assert_stdout>
-                <has_text_matching expression="sc.pp.pca"/>
-                <has_text_matching expression="data=adata"/>
-                <has_text_matching expression="n_comps=20"/>
-                <has_text_matching expression="dtype='float32'"/>
-                <has_text_matching expression="copy=False"/>
-                <has_text_matching expression="chunked=True"/>
-                <has_text_matching expression="chunk_size=50"/>
-                <has_text_matching expression="use_highly_variable=False"/>
-            </assert_stdout>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.pp.pca"/>
+                    <has_text_matching expression="data=adata"/>
+                    <has_text_matching expression="dtype='float32'"/>
+                    <has_text_matching expression="copy=False"/>
+                    <has_text_matching expression="chunked=True"/>
+                    <has_text_matching expression="chunk_size=50"/>
+                    <has_text_matching expression="use_highly_variable=False"/>
+                </assert_contents>
+            </output>
             <output name="anndata_out" file="pp.pca.krumsiek11_chunk.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        -->
-        <test>
-            <!-- test 2 -->
+        <test expect_num_outputs="2">
+            <!-- test 5 -->
             <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.pca"/>
@@ -441,8 +467,8 @@
             </output>
             <output name="anndata_out" file="tl.pca.krumsiek11.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.15"/>
         </test>
-        <test>
-            <!-- test 3 -->
+        <test expect_num_outputs="2">
+            <!-- test 6 -->
             <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.diffmap"/>
@@ -458,8 +484,8 @@
             </output>
             <output name="anndata_out" file="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 4 -->
+        <test expect_num_outputs="2">
+            <!-- test 7 -->
             <param name="adata" value="krumsiek11.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.tsne"/>
@@ -486,8 +512,8 @@
             </output>
             <output name="anndata_out" file="tl.tsne.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 5 -->
+        <test expect_num_outputs="2">
+            <!-- test 8 -->
             <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.umap"/>
@@ -524,8 +550,8 @@
                 </assert_contents>
             </output>
         </test>
-        <test>
-            <!-- test 6 -->
+        <test expect_num_outputs="2">
+            <!-- test 9 -->
             <param name="adata" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/>
             <conditional name="method">
                 <param name="method" value="tl.draw_graph"/>
@@ -544,8 +570,8 @@
             </output>
             <output name="anndata_out" file="tl.draw_graph.pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 7 -->
+        <test expect_num_outputs="2">
+            <!-- test 10 -->
             <param name="adata" value="pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/>
             <conditional name="method">
                 <param name="method" value="tl.paga"/>
@@ -566,8 +592,8 @@
             </output>
             <output name="anndata_out" file="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
-        <test>
-            <!-- test 8 -->
+        <test expect_num_outputs="2">
+            <!-- test 11 -->
             <param name="adata" value="tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" />
             <conditional name="method">
                 <param name="method" value="tl.dpt"/>
@@ -590,6 +616,26 @@
             </output>
             <output name="anndata_out" file="tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
         </test>
+        <test expect_num_outputs="2">
+            <!-- test 12 -->
+            <param name="adata" value="tl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" />
+            <conditional name="method">
+                <param name="method" value="tl.embedding_density"/>
+                <param name="basis" value="umap"/>
+                <param name="key_added" value="umap_density"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sc.tl.embedding_density"/>
+                    <has_text_matching expression="basis='umap'"/>
+                    <has_text_matching expression="key_added='umap_density'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="tl.embedding_density.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
     </tests>
     <help><![CDATA[
 Cluster cells into subgroups (`tl.louvain`)
@@ -602,7 +648,7 @@
 This requires to run `pp.neighbors`, first.
 
 More details on the `tl.louvain scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.louvain.html>`_
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.louvain.html>`_
 
 Cluster cells into subgroups (`tl.leiden`)
 ==========================================
@@ -612,7 +658,7 @@
 The Louvain algorithm has been proposed for single-cell analysis by Levine et al, 2015.
 
 More details on the `tl.leiden scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.leiden.html>`_
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.leiden.html>`_
 
 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `pp.pca`
 ============================================================================================================
@@ -620,7 +666,7 @@
 @CMD_pca_outputs@
 
 More details on the `pp.pca scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.pca.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.pca.html>`__
 
 Computes PCA (principal component analysis) coordinates, loadings and variance decomposition, using `tl.pca`
 ============================================================================================================
@@ -628,7 +674,7 @@
 @CMD_pca_outputs@
 
 More details on the `tl.pca scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.pca.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.pca.html>`__
 
 Diffusion Maps, using `tl.diffmap`
 ==================================
@@ -645,24 +691,24 @@
 `method=='umap'`. Differences between these options shouldn't usually be
 dramatic.
 
-The diffusion map representation of data are added to the return AnnData in the multi-dimensional 
-observations annotation (obsm). It is the right eigen basis of the transition matrix with eigenvectors 
+The diffusion map representation of data are added to the return AnnData in the multi-dimensional
+observations annotation (obsm). It is the right eigen basis of the transition matrix with eigenvectors
 as colum. It can be accessed using the inspect tool for AnnData
 
 More details on the `tl.diffmap scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.diffmap.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.diffmap.html>`__
 
 t-distributed stochastic neighborhood embedding (tSNE), using `tl.tsne`
 =======================================================================
 
 t-distributed stochastic neighborhood embedding (tSNE) (Maaten et al, 2008) has been
 proposed for visualizating single-cell data by (Amir et al, 2013). Here, by default,
-we use the implementation of *scikit-learn* (Pedregosa et al, 2011). 
+we use the implementation of *scikit-learn* (Pedregosa et al, 2011).
 
 It returns `X_tsne`, tSNE coordinates of data.
 
 More details on the `tl.tsne scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.tsne.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.tsne.html>`__
 
 Embed the neighborhood graph using UMAP, using `tl.umap`
 ========================================================
@@ -678,30 +724,30 @@
 (McInnes et al, 2018). For a few comparisons of UMAP with tSNE, see this `preprint
 <https://doi.org/10.1101/298430>`__.
 
-The UMAP coordinates of data are added to the return AnnData in the multi-dimensional 
+The UMAP coordinates of data are added to the return AnnData in the multi-dimensional
 observations annotation (obsm). This data is accessible using the inspect tool for AnnData
 
 More details on the `tl.umap scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.umap.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.umap.html>`__
 
 Force-directed graph drawing, using `tl.draw_graph`
 ===================================================
 
-Force-directed graph drawing describes a class of long-established algorithms for visualizing graphs. 
-It has been suggested for visualizing single-cell data by Islam et al, 11. 
-Many other layouts as implemented in igraph are available. Similar approaches have been used by 
+Force-directed graph drawing describes a class of long-established algorithms for visualizing graphs.
+It has been suggested for visualizing single-cell data by Islam et al, 11.
+Many other layouts as implemented in igraph are available. Similar approaches have been used by
 Zunder et al, 2015 or Weinreb et al, 2016.
 
-This is an alternative to tSNE that often preserves the topology of the data better. 
+This is an alternative to tSNE that often preserves the topology of the data better.
 This requires to run `pp.neighbors`, first.
 
 The default layout (ForceAtlas2) uses the package fa2.
 
-The coordinates of graph layout are added to the return AnnData in the multi-dimensional 
+The coordinates of graph layout are added to the return AnnData in the multi-dimensional
 observations annotation (obsm). This data is accessible using the inspect tool for AnnData.
 
 More details on the `tl.draw_graph scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.draw_graph.html>`__
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.draw_graph.html>`__
 
 Infer progression of cells through geodesic distance along the graph (`tl.dpt`)
 ===============================================================================
@@ -714,7 +760,7 @@
 `n_branchings>1`. We recommend, however, to only use
 `tl.dpt` for computing pseudotime (`n_branchings=0`) and
 to detect branchings via `paga`. For pseudotime, you need
-to annotate your data with a root cell. 
+to annotate your data with a root cell.
 
 This requires to run `pp.neighbors`, first. In order to
 reproduce the original implementation of DPT, use `method=='gauss'` in
@@ -730,7 +776,7 @@
 The tool is similar to the R package `destiny` of Angerer et al (2016).
 
 More details on the `tl.dpt scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.dpt.html>`_
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.dpt.html>`_
 
 
 Generate cellular maps of differentiation manifolds with complex topologies (`tl.paga`)
@@ -761,7 +807,7 @@
 These datasets are stored in the unstructured annotation (uns) and can be accessed using the inspect tool for AnnData objects
 
 More details on the `tl.paga scanpy documentation
-<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.tl.paga.html>`_
+<https://scanpy.readthedocs.io/en/stable/api/scanpy.tl.paga.html>`_
     ]]></help>
     <expand macro="citations"/>
 </tool>