Mercurial > repos > iuc > scanpy_normalize
changeset 12:0ac2f7d40040 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit c21958f44b81d740191999fb6015d5ae69538ee0
line wrap: on
line diff
--- a/macros.xml Wed Sep 22 21:06:01 2021 +0000 +++ b/macros.xml Wed Jul 31 18:08:37 2024 +0000 @@ -1,12 +1,17 @@ <macros> - <token name="@version@">1.7.1</token> - <token name="@profile@">19.01</token> - <token name="@galaxy_version@"><![CDATA[@version@+galaxy0]]></token> + <token name="@TOOL_VERSION@">1.9.6</token> + <token name="@VERSION_SUFFIX@">4</token> + <token name="@profile@">21.09</token> <xml name="requirements"> <requirements> - <requirement type="package" version="@version@">scanpy</requirement> - <requirement type="package" version="2.0.17">loompy</requirement> - <requirement type="package" version="0.8.3">leidenalg</requirement> + <requirement type="package" version="@TOOL_VERSION@">scanpy</requirement> + <requirement type="package" version="3.0.6">loompy</requirement> + <requirement type="package" version="0.10.1">leidenalg</requirement> + <requirement type="package" version="0.8.1">louvain</requirement> + <requirement type="package" version="1.5.3">pandas</requirement> + <requirement type="package" version="3.7">matplotlib</requirement> + <requirement type="package" version="0.12.2">seaborn</requirement> + <requirement type="package" version="3.0.0">magic-impute</requirement> <yield /> </requirements> </xml> @@ -15,9 +20,15 @@ <xref type="bio.tools">scanpy</xref> </xrefs> </xml> + <xml name="creators"> + <creator> + <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/" /> + </creator> + </xml> <xml name="citations"> <citations> <citation type="doi">10.1186/s13059-017-1382-0</citation> + <citation type="doi">10.1093/gigascience/giaa102</citation> </citations> </xml> <xml name="version_command"> @@ -56,7 +67,7 @@ <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/> </xml> <token name="@CMD_read_inputs@"><![CDATA[ -adata = sc.read('anndata.h5ad') +adata = sc.read_h5ad('anndata.h5ad') ]]> </token> <xml name="inputs_common_advanced"> @@ -66,12 +77,12 @@ </xml> <xml name="anndata_outputs"> <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/> - <data name="hidden_output" format="txt" label="Log file" > + <data name="hidden_output" format="txt" label="Log file" > <filter>advanced_common['show_log']</filter> </data> </xml> <token name="@CMD_anndata_write_outputs@"><![CDATA[ -adata.write('anndata.h5ad') +adata.write_h5ad('anndata.h5ad') with open('anndata_info.txt','w', encoding='utf-8') as ainfo: print(adata, file=ainfo) ]]> @@ -414,6 +425,7 @@ <param name="type" type="select" label="Variables to plot (columns of the heatmaps)" > <option value="all">All variables in 'adata.var_names'</option> <option value="custom">Subset of variables in 'adata.var_names'</option> + <option value="customfile">Subset of variables as a tabular file</option> </param> <when value="all"/> <when value="custom"> @@ -421,6 +433,9 @@ <expand macro="sanitize_query" /> </param> </when> + <when value="customfile"> + <param argument="var_names" type="data" format="tabular" label="List of variables to plot" help="This should be a tsv where row = group (e.g. celltypes) and columns = variables."></param> + </when> </conditional> </xml> <xml name="param_num_categories"> @@ -457,15 +472,17 @@ <expand macro="param_num_categories"/> </xml> <token name="@CMD_params_inputs@"><![CDATA[ - #if $method.var_names.type == 'all' - var_names=adata.var_names, -#else + #if $method.var_names.type == 'custom' #set $var_names = ([x.strip() for x in str($method.var_names.var_names).split(',')]) var_names=$var_names, -#end if -#if str($method.groupby) != '' + #else if $method.var_names.type == 'customfile' + var_names={key: [v for v in list(value.values()) if pd.notna(v)] for key, value in pd.read_csv('$var_names', sep='\t', index_col=0).to_dict(orient='index').items()}, + #else + var_names=adata.var_names, + #end if + #if $method.groupby groupby='$method.groupby', -#end if + #end if num_categories=$method.num_categories, ]]></token> <xml name="params_plots"> @@ -491,15 +508,15 @@ var_group_positions=$var_group_positions, var_group_labels=$var_group_labels, #end if -#if $method.var_group_rotation - var_group_rotation=$method.var_group_rotation, -#end if -#if $method.figsize.test == 'yes' - figsize=($method.figsize.width, $method.figsize.height), -#end if -#if $method.layer != '' - layer='$method.layer', -#end if + #if str($method.var_group_rotation) != '' + var_group_rotation=$method.var_group_rotation, + #end if + #if $method.figsize.test == 'yes' + figsize=($method.figsize.width, $method.figsize.height), + #end if + #if $method.layer + layer='$method.layer', + #end if ]]></token> <xml name="matplotlib_color"> <option value="AliceBlue">AliceBlue</option> @@ -652,9 +669,8 @@ <option value="YellowGreen">YellowGreen</option> </xml> <xml name="param_matplotlib_pyplot_edgecolors"> - <param argument="edgecolors" type="select" label="Edge color of the marker" help=""> + <param argument="edgecolors" type="select" optional="true" label="Edge color of the marker" help=""> <option value="face">The edge color will always be the same as the face color</option> - <option value="none">No patch boundary will be drawn</option> <expand macro="matplotlib_color"/> </param> </xml> @@ -692,17 +708,19 @@ </section> </xml> <token name="@CMD_params_matplotlib_pyplot_scatter@"><![CDATA[ - #if $method.matplotlib_pyplot_scatter.vmin + #if str($method.matplotlib_pyplot_scatter.vmin) != '' vmin=$method.matplotlib_pyplot_scatter.vmin, #end if - #if $method.matplotlib_pyplot_scatter.vmax + #if str($method.matplotlib_pyplot_scatter.vmax) != '' vmax=$method.matplotlib_pyplot_scatter.vmax, #end if - #if $method.matplotlib_pyplot_scatter.alpha + #if str($method.matplotlib_pyplot_scatter.alpha) != '' alpha=$method.matplotlib_pyplot_scatter.alpha, #end if - linewidths=$method.matplotlib_pyplot_scatter.linewidths, - edgecolors='$method.matplotlib_pyplot_scatter.edgecolors' + lw=$method.matplotlib_pyplot_scatter.linewidths, + #if $method.matplotlib_pyplot_scatter.edgecolors + ec='$method.matplotlib_pyplot_scatter.edgecolors' + #end if ]]></token> <xml name="conditional_stripplot"> <conditional name="stripplot"> @@ -742,13 +760,7 @@ </param> </xml> <token name="@CMD_params_violin_plots@"><![CDATA[ - stripplot=$method.violin_plot.stripplot.stripplot, -#if $method.violin_plot.stripplot.stripplot == "True" - jitter=$method.violin_plot.stripplot.jitter.jitter, - #if $method.violin_plot.stripplot.jitter.jitter == "True" - size=$method.violin_plot.stripplot.jitter.size, - #end if -#end if + @CMD_conditional_stripplot@ multi_panel=$method.violin_plot.multi_panel.multi_panel, #if $method.multi_panel.violin_plot.multi_panel == "True" and str($method.violin_plot.multi_panel.width) != '' and str($method.violin_plot.multi_panel.height) != '' figsize=($method.violin_plot.multi_panel.width, $method.violin_plot.multi_panel.height) @@ -766,7 +778,7 @@ <option value="h">horizontal</option> </param> <param argument="linewidth" type="float" value="0" label="Width of the gray lines that frame the plot elements" help=""/> - <param argument="color" type="select" label="Color for all of the elements" help=""> + <param argument="color" type="select" optional="true" label="Color for all of the elements" help=""> <expand macro="matplotlib_color"/> </param> <param argument="saturation" type="float" value="0.75" min="0" max="1" label="Proportion of the original saturation to draw colors at" help=""/> @@ -778,7 +790,9 @@ orient='$method.seaborn_violinplot.orient', #end if linewidth=$method.seaborn_violinplot.linewidth, + #if $method.seaborn_violinplot.color color='$method.seaborn_violinplot.color', + #end if saturation=$method.seaborn_violinplot.saturation ]]></token> <xml name="param_color"> @@ -787,7 +801,7 @@ </param> </xml> <token name="@CMD_param_color@"><![CDATA[ -#if str($method.color) != '' +#if $method.color #set $color = ([x.strip() for x in str($method.color).split(',')]) color=$color, #end if @@ -798,7 +812,7 @@ </param> </xml> <token name="@CMD_params_groups@"><![CDATA[ -#if str($method.groups) != '' +#if $method.groups #set $groups=([x.strip() for x in str($method.groups).split(',')]) groups=$groups, #end if @@ -868,14 +882,12 @@ </param> </xml> <xml name="param_palette"> - <param argument="palette" type="select" label="Colors to use for plotting categorical annotation groups" help=""> - <option value="default">Default</option> + <param argument="palette" type="select" optional="true" label="Colors to use for plotting categorical annotation groups" help=""> <expand macro="matplotlib_pyplot_colormap"/> </param> </xml> <xml name="param_color_map"> - <param argument="color_map" type="select" label="Color map to use for continous variables" help=""> - <option value="None">Default</option> + <param argument="color_map" type="select" optional="true" label="Color map to use for continous variables" help=""> <expand macro="matplotlib_pyplot_colormap"/> </param> </xml> @@ -931,7 +943,7 @@ </param> <when value="True"> <param argument="edges_width" type="float" min="0" value="0.1" label="Width of edges"/> - <param argument="edges_color" type="select" label="Color of edges"> + <param argument="edges_color" type="select" optional="true" label="Color of edges"> <expand macro="matplotlib_color"/> </param> </when> @@ -942,7 +954,9 @@ #if str($method.edges.edges) == 'True' edges=True, edges_width=$method.edges.edges_width, + #if $method.edges.edges_color edges_color='$method.edges.edges_color', + #end if #else edges=False, #end if @@ -952,7 +966,7 @@ <param argument="arrows" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Show arrows?" help="It requires to run 'tl.rna_velocity' before."/> </xml> <xml name="param_cmap"> - <param argument="cmap" type="select" label="Colors to use for plotting categorical annotation groups" help=""> + <param argument="cmap" type="select" optional="true" label="Colors to use for plotting categorical annotation groups" help=""> <expand macro="matplotlib_pyplot_colormap"/> </param> </xml> @@ -979,10 +993,10 @@ @CMD_param_legend_fontsize@ legend_fontweight='$method.plot.legend_fontweight', @CMD_param_size@ - #if str($method.plot.color_map) != 'None' + #if $method.plot.color_map color_map='$method.plot.color_map', #end if - #if str($method.plot.palette) != '' + #if $method.plot.palette palette='$method.plot.palette', #end if frameon=$method.plot.frameon, @@ -1052,18 +1066,18 @@ </xml> <token name="@CMD_params_pl_paga@"><![CDATA[ threshold=$method.threshold, -#if str($method.groups) != '' +#if $method.groups #set $groups=([x.strip() for x in str($method.groups).split(',')]) groups=$groups, #end if -#if str($method.color) != '' +#if $method.color #set $color=([x.strip() for x in str($method.color).split(',')]) color=$color, #end if #if $method.pos pos=np.fromfile($method.pos, dtype=dt), #end if -#if str($method.labels) != '' +#if $method.labels #set $labels=([x.strip() for x in str($method.labels).split(',')]) labels=$labels, #end if @@ -1072,7 +1086,7 @@ init_pos=np.fromfile($method.init_pos, dtype=dt), #end if random_state=$method.random_state, -#if str($method.root) != '' +#if $method.root #set $root=([int(x.strip()) for x in str($method.root).split(',')]) root=$root, #end if @@ -1088,15 +1102,17 @@ node_size_scale=$method.node_size_scale, node_size_power=$method.node_size_power, edge_width_scale=$method.edge_width_scale, -#if $method.min_edge_width +#if str($method.min_edge_width) != '' min_edge_width=$method.min_edge_width, #end if -#if $method.max_edge_width +#if str($method.max_edge_width) != '' max_edge_width=$method.max_edge_width, #end if arrowsize=$method.arrowsize, normalize_to_color=$method.normalize_to_color, + #if $method.cmap cmap='$method.cmap', + #end if #if $method.title title='$method.title', #end if @@ -1112,10 +1128,10 @@ </param> </xml> <xml name="param_n_genes"> - <param argument="n_genes" type="integer" min="0" value="10" label="Number of genes to show" help=""/> + <param argument="n_genes" type="integer" min="0" value="10" label="Number of genes to show" help="It is only used if you are not specifying certain variable names"/> </xml> <xml name="pl_dotplot"> - <param argument="color_map" type="select" label="Color palette"> + <param argument="color_map" type="select" optional="true" label="Color palette"> <expand macro="matplotlib_pyplot_colormap"/> </param> <param argument="dot_max" type="float" value="" min="0" max="1" optional="true" label="Maximum dot size" help="If none, the maximum dot size is set to the maximum fraction value found (e.g. 0.6). If given, the value should be a number between 0 and 1. All fractions larger than dot_max are clipped to this value."/> @@ -1123,7 +1139,9 @@ <expand macro="section_matplotlib_pyplot_scatter"/> </xml> <token name="@CMD_pl_dotplot@"><![CDATA[ + #if $method.color_map color_map='$method.color_map', + #end if #if str($method.dot_max) != '' dot_max=$method.dot_max, #end if @@ -1174,17 +1192,19 @@ <token name="@CMD_pl_heatmap@"><![CDATA[ swap_axes=$method.swap_axes, show_gene_labels=$method.show_gene_labels, + #if $method.matplotlib_pyplot_imshow.cmap cmap='$method.matplotlib_pyplot_imshow.cmap', + #end if #if str($method.matplotlib_pyplot_imshow.interpolation) != 'None' interpolation='$method.matplotlib_pyplot_imshow.interpolation', #end if - #if $method.matplotlib_pyplot_imshow.alpha + #if str($method.matplotlib_pyplot_imshow.alpha) != '' alpha=$method.matplotlib_pyplot_imshow.alpha, #end if - #if $method.matplotlib_pyplot_imshow.vmin + #if str($method.matplotlib_pyplot_imshow.vmin) != '' vmin=$method.matplotlib_pyplot_imshow.vmin, #end if - #if $method.matplotlib_pyplot_imshow.vmax + #if str($method.matplotlib_pyplot_imshow.vmax) != '' vmax=$method.matplotlib_pyplot_imshow.vmax, #end if origin='$method.matplotlib_pyplot_imshow.origin' @@ -1197,10 +1217,8 @@ </xml> <token name="@CMD_pl_rank_genes_groups_ext@"><![CDATA[ @CMD_params_groups@ - #if str($method.n_genes) != '' n_genes=$method.n_genes, - #end if - #if str($method.key) != '' + #if $method.key key='$method.key', #end if ]]> @@ -1208,7 +1226,7 @@ <xml name="pl_matrixplot"> <expand macro="param_swap_axes"/> <section name="matplotlib_pyplot_pcolor" title="Parameters for matplotlib.pyplot.pcolor"> - <param argument="cmap" type="select" label="Color palette"> + <param argument="cmap" type="select" optional="true" label="Color palette"> <expand macro="seaborn_color_palette_options"/> </param> <param argument="vmin" type="float" value="" optional="true" label="Minimum value to anchor the colormap" help=""/> @@ -1220,15 +1238,19 @@ </xml> <token name="@CMD_pl_matrixplot@"><![CDATA[ swap_axes=$method.swap_axes, + #if $method.matplotlib_pyplot_pcolor.cmap cmap='$method.matplotlib_pyplot_pcolor.cmap', - #if $method.matplotlib_pyplot_pcolor.vmin + #end if + #if str($method.matplotlib_pyplot_pcolor.vmin) != '' vmin=$method.matplotlib_pyplot_pcolor.vmin, #end if - #if $method.matplotlib_pyplot_pcolor.vmax + #if str($method.matplotlib_pyplot_pcolor.vmax) != '' vmax=$method.matplotlib_pyplot_pcolor.vmax, #end if - edgecolors='$method.matplotlib_pyplot_pcolor.edgecolors', - #if $method.matplotlib_pyplot_pcolor.alpha + #if $method.matplotlib_pyplot_pcolor.edgecolors + ec='$method.matplotlib_pyplot_pcolor.edgecolors', + #end if + #if str($method.matplotlib_pyplot_pcolor.alpha) != '' alpha=$method.matplotlib_pyplot_pcolor.alpha, #end if snap=$method.matplotlib_pyplot_pcolor.snap @@ -1240,7 +1262,7 @@ <expand macro="conditional_stripplot"/> <expand macro="param_scale"/> </section> - <param argument="row_palette" type="select" label="Colors to use in each of the stacked violin plots"> + <param argument="row_palette" type="select" optional="true" label="Colors to use in each of the stacked violin plots"> <option value="muted">muted</option> <expand macro="seaborn_color_palette_options"/> </param> @@ -1255,7 +1277,9 @@ swap_axes=$method.swap_axes, @CMD_conditional_stripplot@ scale='$method.violin_plot.scale', + #if $method.row_palette row_palette='$method.row_palette', + #end if #if str($method.standard_scale) != 'None' standard_scale='$method.standard_scale', #end if
--- a/normalize.xml Wed Sep 22 21:06:01 2021 +0000 +++ b/normalize.xml Wed Jul 31 18:08:37 2024 +0000 @@ -1,9 +1,9 @@ -<tool id="scanpy_normalize" name="Normalize" version="@galaxy_version@" profile="@profile@"> - <description>with scanpy</description> - <expand macro="bio_tools"/> +<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> + <description>and impute with scanpy</description> <macros> <import>macros.xml</import> </macros> + <expand macro="bio_tools"/> <expand macro="requirements"/> <expand macro="version_command"/> <command detect_errors="exit_code"><![CDATA[ @@ -17,17 +17,17 @@ #if $method.method == "pp.normalize_total" sc.pp.normalize_total( adata, - #if str($method.target_sum)!= '' + #if str($method.target_sum) != '' target_sum=$method.target_sum, #end if exclude_highly_expressed=$method.exclude_highly_expressed.exclude_highly_expressed, #if $method.exclude_highly_expressed.exclude_highly_expressed == "True" max_fraction=$method.exclude_highly_expressed.max_fraction, #end if - #if str($method.key_added) != '' + #if $method.key_added key_added='$method.key_added', #end if - #if str($method.layers) != '' + #if $method.layers #if str($method.layers) != 'all' layers[str(x.strip()) for x in str($method.layers).split(',')], #else @@ -65,6 +65,29 @@ plot=False, copy=False) +#else if $method.method == "external.pp.magic" +sc.external.pp.magic( + adata=adata, + name_list='$method.name_list', + knn=$method.knn, + #if str($method.decay) != '' + decay=$method.decay, + #end if + #if str($method.knn_max) != '' + knn_max=$method.knn_max, + #end if + #if $method.t == -1 + t='auto', + #else + t=$method.t, + #end if + #if str($method.n_pca) != '' + n_pca=$method.n_pca, + #end if + solver='$method.solver', + knn_dist='$method.knn_dist', + random_state=$method.random_state, + copy=False) #end if @CMD_anndata_write_outputs@ @@ -79,6 +102,7 @@ <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option> <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option> <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option> + <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option> </param> <when value="pp.normalize_total"> <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/> @@ -119,6 +143,29 @@ <when value="pp.recipe_seurat"> <expand macro="param_log"/> </when> + <when value="external.pp.magic"> + <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory"> + <option value="all_genes">All genes</option> + <option value="pca_only">PCA only</option> + </param> + <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/> + <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" + help="If not set, alpha decaying kernel is not used" /> + <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection" + help="If not set, will be set to 3 * knn" /> + <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion" + help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." /> + <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods" + help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." /> + <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory"> + <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option> + <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option> + </param> + <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html"> + <expand macro="distance_metric_options"/> + </param> + <expand macro="param_random_state"/> + </when> </conditional> <expand macro="inputs_common_advanced"/> </inputs> @@ -126,8 +173,8 @@ <expand macro="anndata_outputs"/> </outputs> <tests> - <test> - <!-- test 0 --> + <test expect_num_outputs="2"> + <!-- test 1 --> <param name="adata" value="krumsiek11.h5ad" /> <conditional name="method"> <param name="method" value="pp.normalize_total"/> @@ -151,8 +198,8 @@ </output> <output name="anndata_out" file="pp.normalize_total.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test> - <!-- test 1 --> + <test expect_num_outputs="2"> + <!-- test 2 --> <param name="adata" value="random-randint.h5ad"/> <conditional name="method"> <param name="method" value="pp.recipe_zheng17"/> @@ -171,8 +218,8 @@ </output> <output name="anndata_out" file="pp.recipe_zheng17.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.15"/> </test> - <test> - <!-- test 2 --> + <test expect_num_outputs="2"> + <!-- test 3 --> <param name="adata" value="paul15_subsample.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_weinreb17"/> @@ -199,8 +246,8 @@ </output> <output name="anndata_out" file="pp.recipe_weinreb17.paul15_subsample.updated.h5ad" ftype="h5ad" compare="sim_size"/> </test> - <test> - <!-- test 3 --> + <test expect_num_outputs="2"> + <!-- test 4 --> <param name="adata" value="pp.recipe_zheng17.random-randint.h5ad" /> <conditional name="method"> <param name="method" value="pp.recipe_seurat"/> @@ -217,6 +264,53 @@ </output> <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/> </test> + <test expect_num_outputs="2"> + <!-- test 5 --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="external.pp.magic"/> + <param name="name_list" value="all_genes"/> + <param name="t" value="-1"/> + <param name="n_pca" value="5"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="external.pp.magic"/> + <has_text_matching expression="name_list='all_genes'"/> + <has_text_matching expression="t='auto'"/> + <has_text_matching expression="n_pca=5"/> + </assert_contents> + </output> + <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> + </test> + <test expect_num_outputs="2"> + <!-- test 6 --> + <param name="adata" value="krumsiek11.h5ad" /> + <conditional name="method"> + <param name="method" value="external.pp.magic"/> + <param name="name_list" value="pca_only"/> + <param name="t" value="3"/> + <param name="n_pca" value="5"/> + </conditional> + <section name="advanced_common"> + <param name="show_log" value="true" /> + </section> + <output name="hidden_output"> + <assert_contents> + <has_text_matching expression="external.pp.magic"/> + <has_text_matching expression="name_list='pca_only'"/> + <has_text_matching expression="t=3"/> + <has_text_matching expression="n_pca=5"/> + </assert_contents> + </output> + <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/> + <assert_stdout> + <has_text text="X_magic"/> + </assert_stdout> + </test> </tests> <help><![CDATA[ Normalize total counts per cell (`pp.normalize_per_cell`) @@ -228,7 +322,7 @@ Similar functions are used, for example, by Seurat, Cell Ranger or SPRING. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.normalize_per_cell.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.normalize_per_cell.html>`__ Normalization and filtering as of Zheng et al. (2017), the Cell Ranger R Kit of 10x Genomics (`pp.recipe_zheng17`) @@ -247,7 +341,7 @@ - scale to unit variance and shift to zero mean More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_zheng17.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_zheng17.html>`__ Normalization and filtering as of Weinreb et al (2017) (`pp.recipe_weinreb17`) @@ -256,7 +350,7 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_weinreb17.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_weinreb17.html>`__ Normalization and filtering as of Seurat et al (2015) (`pp.recipe_seurat`) @@ -267,7 +361,21 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`. More details on the `scanpy documentation -<https://icb-scanpy.readthedocs-hosted.com/en/@version@/api/scanpy.pp.recipe_seurat.html>`__ +<https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.recipe_seurat.html>`__ + + +Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`) +============================================================================================================ + +MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold. + +The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). + +- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability. +- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements. + +More details on the `scanpy documentation +<https://scanpy.readthedocs.io/en/stable/api/scanpy.external.pp.magic.html>`__ ]]></help> <expand macro="citations"/>
Binary file test-data/pl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.png has changed
Binary file test-data/pl.dpt_groups_pseudotime.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.png has changed
Binary file test-data/pl.dpt_timeseries.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.png has changed
Binary file test-data/pl.highest_expr_genes.filter_genes_dispersion.krumsiek11-seurat.png has changed
Binary file test-data/pl.rank_genes_groups_stacked_violin.rank_genes_groups.krumsiek11.png has changed
Binary file test-data/pl.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.png has changed
Binary file test-data/pp.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.dpt.diffmap.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.draw_graph.pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.embedding_density.umap.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.leiden.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.louvain.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
Binary file test-data/tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad has changed
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv Wed Jul 31 18:08:37 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 +CD14+ Monocyte PILRA PSAP CD68 TMEM176B FTL NPC2 LST1 FCGR3A FCER1G CEBPB FCN1 SERPINA1 OAZ1 CFD FTH1 HCK AIF1 SAT1 CTSS S100A11 MS4A7 TYROBP COTL1 STXBP2 RP11-290F20.3 S100A4 IFITM2 SPI1 DUSP1 SESN2 IFITM3 MPP1 GALE CORO1B RP11-390E23.6 VIMP RSBN1L-AS1 CHD4 CFP GSTP1 PFN1 FCGRT ADTRP ARHGDIB AMICA1 HLA-DRB5 CST3 GRN HLA-DPA1 SSR3 +CD19+ B TNFRSF13B CD79B SMARCB1 PNOC CCDC50 AL928768.3 BANK1 MS4A1 CD79A ISG20 IGLL5 TNFRSF17 KIAA0125 TPD52 PEBP1 FKBP11 CCDC132 SUB1 POU2AF1 MZB1 PTPRCAP UBE2J1 BLK SPIB DERL3 FAM63B MPHOSPH9 IGJ FCRLA XBP1 NCF1 SSR3 CD52 TSHZ2 PDLIM1 VIMP SSR4 S1PR4 SELL HMGA1 NUCB2 JUN CD27 ARHGDIB GYPC CALR ADTRP BTG1 EXOG RARRES3 +CD34+ PRSS57 C19orf77 SPINK2 RP11-620J15.3 SNHG7 CYTL1 EGFL7 NGFRAP1 SOX4 NFE2 EGR1 RP3-467N11.1 H1FX CDK6 SERPINB1 SPINT2 HMGA1 IL1B NUCB2 RPLP0 IGFBP7 RPLP1 ATXN7L3B RPS3 C1orf228 KIAA0125 RPL3 SYPL1 CD63 LDHB SEPT1 JUN FAM101B PRKCQ-AS1 MATK PEBP1 SELL ITM2A SSR3 SPON2 XBP1 UBE2J1 VIMP GYPC STK17A STMN1 VIM MZB1 HOPX CD99 +CD4+/CD25 T Reg IL32 SPOCK2 ACTG1 CD2 CD3D GPR171 ARHGDIB ACOX1 MAL SIT1 GIMAP4 AES CD52 SEPT1 TMSB10 LAT STMN1 LINC00402 CD27 TSHZ2 S1PR4 CD3E PFN1 CD99 AQP3 PTPRCAP CD3G LY9 LCK CD247 S100A4 CCR7 TTC39C CORO1B MPHOSPH9 FYB RPSA FLT3LG B2M GIMAP7 PRKCQ-AS1 SELL BTG1 CCDC132 GYPC DENND2D LDHB IL7R ITM2A RPLP0 +CD4+/CD45RA+/CD25- Naive T EAF2 GNG7 SSR4 CALR DERL3 MANF IGJ XBP1 ATXN7L3B SSR3 UBE2J1 CD79A MZB1 RP3-467N11.1 TNFRSF17 NCF1 CDK6 SUB1 POU2AF1 AL928768.3 FKBP11 VIMP GYPC JUN CD27 PEBP1 SMARCB1 FLT3LG RPLP1 RPLP0 CCDC50 ISG20 IGLL5 HCST GSTP1 GPX1 CD52 VIM PTPRCAP FCGRT CD74 B2M RPL3 CYTL1 SPINK2 PRSS57 C19orf77 RP11-620J15.3 FAM101B CCDC132 +CD4+/CD45RO+ Memory RNF138 NOSIP IFITM1 LCK RARRES3 ALOX5AP FAM63B RAB3IP GZMK CD3G SEPT1 LDHB SELL CD3D EXOG RPSA CD247 AES CD52 TMSB10 NUCB2 DENND2D RPL3 RPLP1 ACTG1 FYB GIMAP7 CORO1B LY9 CD7 PFN1 RPS3 GYPC CD2 ARHGDIB IL32 RPLP0 CD99 CD3E GIMAP4 HCST B2M LAT ISG20 ITM2A FKBP11 SERPINB1 STK17A CCR7 PTPRCAP +CD56+ NK CST7 SPON2 HOPX GNLY NKG7 CTSW KLRC2 CD7 MATK PCIF1 CLIC3 FGFBP2 SYPL1 GZMB C9orf142 PRF1 CD247 HCST GZMA GZMH STMN1 ALOX5AP CD63 CD99 IGFBP7 GZMM CCL5 B2M DENND2D GIMAP7 RARRES3 SIT1 IFITM1 PFN1 EXOG XBP1 IFITM2 GIMAP4 VIMP STK17A LCK GZMK SEPT1 SSR3 CD8A CD3G SPOCK2 RPS3 LDHB IL32 +CD8+ Cytotoxic T FAM101B ADTRP GZMK HCST LAT EGR1 CD8B CCL5 RPL3 LINC00402 FGFBP2 GZMM RPS3 CD3E GYPC DENND2D C9orf142 GZMA SEPT1 JUN FYB CD8A SELL ALOX5AP CD3G STK17A AQP3 C1orf228 CD3D HOPX NKG7 CD2 NGFRAP1 RPLP1 RPSA CCR7 IL7R SPON2 PRF1 RARRES3 PRKCQ-AS1 FKBP11 MANF CTSW GNLY CD27 LDHB MAL LTB RPLP0 +CD8+/CD45RA+ Naive Cytotoxic RP11-291B21.2 CD8A CD8B RSBN1L-AS1 GIMAP5 GZMM GALE CCR7 STK17A RAB3IP GZMH GIMAP7 CD3E C1orf228 LCK CCL5 PEBP1 CD27 GYPC LDHB RNF34 CD99 CD3G PFN1 IL7R CD2 C9orf142 TMSB10 NGFRAP1 S1PR4 ITM2A CD7 RPS3 IL32 FYB IFITM1 CD52 LAT GIMAP4 MAL STMN1 NOSIP RARRES3 SPOCK2 ACTG1 PRF1 CD3D RPLP1 SELL GZMA +Dendritic HLA-DQB1 CST3 HLA-DRB1 HLA-DQA2 HLA-DQA1 LYZ HLA-DPB1 HLA-DPA1 HLA-DMA HLA-DRA VIM CD74 ALDH2 FCER1A GPX1 HLA-DRB5 LGALS2 MNDA FCGRT GRN HLA-DMB FOS CPVL CLEC10A AMICA1 CFP LY86 GSTP1 RP11-473M20.7 IL1B GSN SPINT2 CCDC163P IGFBP7 EXOG DUSP1 CD63 COTL1 FTH1 SPI1 TYROBP SPIB S100A11 OAZ1 CTSS CCDC50 AIF1 SERPINB1 TMSB10 PCIF1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv Wed Jul 31 18:08:37 2024 +0000 @@ -0,0 +1,11 @@ + 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 +CD14+ Monocyte PILRA PSAP CD68 TMEM176B FTL NPC2 LST1 FCGR3A FCER1G CEBPB FCN1 SERPINA1 OAZ1 CFD FTH1 HCK AIF1 SAT1 CTSS S100A11 TYROBP COTL1 S100A4 SPI1 DUSP1 +CD19+ B TNFRSF13B CD79B SMARCB1 PNOC CCDC50 AL928768.3 BANK1 MS4A1 CD79A ISG20 IGLL5 TNFRSF17 KIAA0125 TPD52 PEBP1 FKBP11 SUB1 POU2AF1 MZB1 PTPRCAP DERL3 XBP1 CD52 +CD34+ PRSS57 C19orf77 SPINK2 RP11-620J15.3 SNHG7 CYTL1 EGFL7 NGFRAP1 SOX4 NFE2 EGR1 RP3-467N11.1 H1FX CDK6 SERPINB1 SPINT2 HMGA1 IL1B NUCB2 RPLP0 IGFBP7 RPLP1 ATXN7L3B RPS3 KIAA0125 RPL3 SYPL1 LDHB SEPT1 FAM101B +CD4+/CD25 T Reg IL32 SPOCK2 ACTG1 CD2 CD3D ARHGDIB GIMAP4 AES CD52 SEPT1 TMSB10 LAT STMN1 CD27 CD3E PFN1 CD99 PTPRCAP CD3G LCK CD247 +CD4+/CD45RA+/CD25- Naive T +CD4+/CD45RO+ Memory RNF138 NOSIP IFITM1 LCK RARRES3 ALOX5AP RAB3IP GZMK CD3G SEPT1 LDHB SELL CD3D EXOG RPSA CD247 AES CD52 TMSB10 RPL3 +CD56+ NK CST7 SPON2 HOPX GNLY NKG7 CTSW KLRC2 CD7 MATK PCIF1 CLIC3 FGFBP2 SYPL1 GZMB C9orf142 PRF1 CD247 HCST GZMA GZMH STMN1 ALOX5AP CD63 CD99 IGFBP7 GZMM CCL5 B2M DENND2D GIMAP7 RARRES3 SIT1 PFN1 XBP1 +CD8+ Cytotoxic T FAM101B ADTRP GZMK HCST LAT EGR1 CD8B CCL5 RPL3 LINC00402 FGFBP2 GZMM RPS3 CD3E GYPC C9orf142 SEPT1 SELL CD3G CD3D HOPX NGFRAP1 +CD8+/CD45RA+ Naive Cytotoxic RP11-291B21.2 CD8A CD8B RSBN1L-AS1 GIMAP5 GZMM GALE CCR7 STK17A RAB3IP GIMAP7 CD3E LCK PEBP1 CD27 LDHB CD99 CD3G C9orf142 NGFRAP1 IL32 +Dendritic HLA-DQB1 CST3 HLA-DRB1 HLA-DQA2 HLA-DQA1 HLA-DPB1 HLA-DPA1 HLA-DRA CD74