Mercurial > repos > ebi-gxa > seurat_find_clusters
changeset 2:ea5b47974d4f draft default tip
planemo upload commit 34c30124158749b9eef51d5f323b608a503e7940
author | ebi-gxa |
---|---|
date | Sun, 01 Oct 2023 09:06:23 +0000 |
parents | bf0e50f4f010 |
children | |
files | README extra/macro_mapper_seurat.yaml get_test_data.sh seurat_find_clusters.xml seurat_macros.xml |
diffstat | 5 files changed, 536 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/README Wed Mar 13 12:51:22 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -Seurat tools
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/extra/macro_mapper_seurat.yaml Sun Oct 01 09:06:23 2023 +0000 @@ -0,0 +1,90 @@ +--- +- option_group: + - input-object-file + - input-format + pre_command_macros: + - INPUT_OBJ_PREAMBLE + post_command_macros: + - INPUT_OBJECT + input_declaration_macros: + - input_object_params +- option_group: + - output-object-file + - output-format + post_command_macros: + - OUTPUT_OBJECT + input_declaration_macros: + - output_object_params + output_declaration_macros: + - output_files +- option_group: + - input-object-files + - input-format + pre_command_macros: + - INPUT_OBJS_PREAMBLE + post_command_macros: + - INPUT_OBJECTS + input_declaration_macros: + - input_object_params: + multiple: true +- option_group: + - reference-object-files + - reference-format + pre_command_macros: + - REFERENCE_OBJS_PREAMBLE + post_command_macros: + - REFERENCE_OBJECTS + input_declaration_macros: + - input_object_params: + varname: reference + multiple: true + optional: true +- option_group: + - reference-object-file + - reference-format + pre_command_macros: + - REFERENCE_OBJ_PREAMBLE + post_command_macros: + - REFERENCE_OBJECT + input_declaration_macros: + - input_object_params: + varname: reference +- option_group: + - anchors-object-file + - anchors-format + pre_command_macros: + - ANCHORS_OBJ_PREAMBLE + post_command_macros: + - ANCHORS_OBJECT + input_declaration_macros: + - input_object_params: + varname: anchors +- option_group: + - query-object-file + - query-format + pre_command_macros: + - QUERY_OBJ_PREAMBLE + post_command_macros: + - QUERY_OBJECT + input_declaration_macros: + - input_object_params: + varname: query +- option_group: + - plot-out + post_command_macros: + - OUTPUT_PLOT + output_declaration_macros: + - plot_output_files_format: + format: png + - plot_output_files_format: + format: pdf + - plot_output_files_format: + format: eps + - plot_output_files_format: + format: jpg + - plot_output_files_format: + format: ps + - plot_output_files_format: + format: tiff + - plot_output_files_format: + format: svg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_test_data.sh Sun Oct 01 09:06:23 2023 +0000 @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +BASENAME_FILE='E-MTAB-6077-3k_features_90_cells' + +MTX_LINK='https://drive.google.com/uc?export=download&id=1-1ejn7scP80xsbrG0FtWzsozjg0hhc23' +RDS_LINK='https://drive.google.com/uc?export=download&id=1KW_GX6xznSUpWRWUykpNaSbAhyClf7_n' +NORM_LINK='https://drive.google.com/uc?export=download&id=1mvo3ENkBvEAOyWG6ejApzQTPDLX5yBKU' +FVG_LINK='https://drive.google.com/uc?export=download&id=13Fhruuj-vEEo1WM138ahtAYqfHc7LsaZ' +SCALED_LINK='https://drive.google.com/uc?export=download&id=18TK8us235LWNajarWDBAtASUXMYAxvw0' +PCA_LINK='https://drive.google.com/uc?export=download&id=1gf3BTB4dygDsom1TzjsBfgZnZepcoG5c' +NEIGHBOURS_LINK='https://drive.google.com/uc?export=download&id=1N2lHoKRBZ7pmAYGfghLWB9KUrLA5WoNX' +CLUSTERS_LINK='https://drive.google.com/uc?export=download&id=1HWxZWHbNUNo4z__9PhhL_CJOLzec_ETa' +TSNE_LINK='https://drive.google.com/uc?export=download&id=1qsvMr_GkCSp1dyTJt1BZ6cElJwFFX2zO' +MARKERS_LINK='https://drive.google.com/uc?export=download&id=18OmWNc7mF-4pzH6DQkPp1eKunN4BfvxD' + +LOOM_LINK='https://drive.google.com/uc?export=download&id=1qNk5cg8hJG3Nv1ljTKmUEnxTOf11EEZX' +H5AD_LINK='https://drive.google.com/uc?export=download&id=1YpE0H_t_dkh17P-WBhPijKvRiGP0BlBz' + +H5AD_SC182_LINK='https://drive.google.com/uc?export=download&id=16PUJ2KAkXT8F1UkfqU-9LWoOJUkUG1rp' +SCE_LINK='https://drive.google.com/uc?export=download&id=1UKdyf3M01uAt7oBg93JfmRvNVB_jlUKe' + +# Seurat v4 exclusives +IFNB_BASE_FILE='ifnb_' + +IFNB_CTRL_INT_LINK='https://drive.google.com/uc?export=download&id=15E_MLz-UclJYInNaA7YKLhLo5W-qlykL' +IFNB_STIM_INT_LINK='https://drive.google.com/uc?export=download&id=14iKgCJGPk16dEmpJJF-Gp_lBDcOdo-54' + +## Classify and UMAP mapping +CLASSIFY_QUERY_LINK='https://drive.google.com/uc?export=download&id=1RFsHa_1EFD_n-19JH_cHGqxwO66QdmXN' +CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK='https://drive.google.com/uc?export=download&id=1Xtv4K_CxIU1cJ8RjJ7NTvzLQkLvc8a3i' +# UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/k4MdM07y9DAnurp/download' +UMAP_RESULT_OBJECT_LINK='https://oc.ebi.ac.uk/s/D1z4z2ef1e3dyc3/download' + + +function get_data { + local link=$1 + local fname=$2 + + if [ ! -f $fname ]; then + echo "$fname not available locally, downloading.." + wget -O $fname --retry-connrefused --waitretry=1 --read-timeout=20 --timeout=15 -t 3 $link + fi +} + +# get matrix data +mkdir -p test-data +pushd test-data +get_data $MTX_LINK mtx.zip +unzip mtx.zip +rm -f mtx.zip + +get_data $RDS_LINK $BASENAME_FILE".rds" +get_data $NORM_LINK $BASENAME_FILE"-normalised.rds" +get_data $FVG_LINK $BASENAME_FILE"-fvg.rds" +get_data $SCALED_LINK $BASENAME_FILE"-scaled.rds" +get_data $PCA_LINK $BASENAME_FILE"-pca.rds" +get_data $NEIGHBOURS_LINK $BASENAME_FILE"-neighbours.rds" +get_data $CLUSTERS_LINK $BASENAME_FILE"-clusters.rds" +get_data $TSNE_LINK $BASENAME_FILE"-tsne.rds" +get_data $MARKERS_LINK $BASENAME_FILE"-markers.csv.zip" + +unzip $BASENAME_FILE"-markers.csv.zip" +rm -f $BASENAME_FILE"-markers.csv.zip" + +get_data $LOOM_LINK $BASENAME_FILE"_loom.h5" +get_data $SCE_LINK $BASENAME_FILE"_sce.rds" +get_data $H5AD_LINK $BASENAME_FILE".h5ad" + +get_data $H5AD_SC182_LINK $BASENAME_FILE"_sc182.h5ad" + +get_data $IFNB_CTRL_INT_LINK $IFNB_BASE_FILE"ctrl_norm_fvg.rds" +get_data $IFNB_STIM_INT_LINK $IFNB_BASE_FILE"stim_norm_fvg.rds" + +get_data $CLASSIFY_QUERY_LINK "Classify_query.rds" +get_data $CLASSIFY_RESULTS_ANCHORS_OBJECT_LINK "Classify_anchors.rds" +get_data $UMAP_RESULT_OBJECT_LINK "UMAP_result_integrated.rds" +
--- a/seurat_find_clusters.xml Wed Mar 13 12:51:22 2019 -0400 +++ b/seurat_find_clusters.xml Sun Oct 01 09:06:23 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="seurat_find_clusters" name="Seurat FindClusters" version="2.3.1+galaxy1"> +<tool id="seurat_find_clusters" name="Seurat FindClusters" profile="18.01" version="@SEURAT_VERSION@+galaxy0"> <description>find clusters of cells</description> <macros> <import>seurat_macros.xml</import> @@ -8,30 +8,10 @@ <command detect_errors="exit_code"><![CDATA[ seurat-find-clusters.R - --input-object-file '$input' - --output-object-file '$output' + @INPUT_OBJECT@ + @OUTPUT_OBJECT@ --output-text-file output_tab - #if $genes_use: - --genes-use '$genes_use' - #end if - - #if str($adv.reduction_type): - --reduction-type '$adv.reduction_type' - #end if - - #if str($adv.dims_use): - --dims-use \$(seq -s , 1 '$adv.dims_use') - #end if - - #if str($adv.k_num_clusters): - --k-param '$adv.k_num_clusters' - #end if - - #if str($adv.prune_snn): - --prune-snn '$adv.prune_snn' - #end if - #if str($adv.resolution): --resolution '$adv.resolution' #end if @@ -40,39 +20,75 @@ --algorithm '$adv.algorithm' #end if + #if str($adv.modularity_fxn): + --modularity-fxn '$adv.modularity_fxn' + #end if + + #if str($adv.method): + --method '$adv.method' + #end if + + #if str($adv.graph_name): + --graph-name '$adv.graph_name' + #end if + + #if str($adv.nrandom_starts): + --nrandom-starts '$adv.nrandom_starts' + #end if + + $adv.group_singletons + + + ## TODO add pdf support as optional ]]></command> <inputs> - <param name="input" argument="--input-object-file" type="data" format="rdata" label="Seurat RDS object" help="Seurat object produced by Seurat run PCA or other." /> - <expand macro="genes-use-input"/> + <expand macro="input_object_params"/> + <expand macro="output_object_params"/> <section name="adv" title="Advanced Options"> - <param name="reduction_type" argument="--reduction-type" optional="true" type="select" label="Dimensional reduction type" help="dimensional reduction technique to use in construction of SNN graph. (e.g. 'pca', 'ica'). PCA by default."> - <option value="pca" selected="true">PCA</option> - <option value="ica">ICA</option> - </param> - <expand macro="dims-use-input"/> - <param name="k_num_clusters" argument="--k-param" optional="true" type="integer" label="Number of clusters (k) to compute" help="Defines k for the k-nearest neighbor algorithm."/> - <param name="prune_snn" argument="--prune-snn" optional="true" type="float" label="Prune SNN cutoff" help="Sets the cutoff for acceptable Jaccard distances when computing the neighborhood overlap for the SNN construction. Any edges with values less than or equal to this will be set to 0 and removed from the SNN graph. Essentially sets the strigency of pruning (0 — no pruning, 1 — prune everything). Defaults to 1/15."/> <param name="resolution" argument="--resolution" optional="true" type="float" label="Resolution" help="Value of the resolution parameter, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities. Defaults to 0.8."/> <param name="algorithm" argument="--algorithm" optional="true" type="select" label="Modularity organization algorithm"> <option value="1" selected="true">Louvain</option> <option value="2">Louvain algorithm with multilevel refinement</option> <option value="3">SLM algorithm</option> + <option value="4">Leiden</option> </param> + <param name="modularity_fxn" argument="--modularity-fxn" optional="true" type="select" label="Modularity function"> + <option value="1" selected="true">Standard</option> + <option value="2">Alternative</option> + </param> + <param name="method" argument="--method" type="select" label="Method for Leiden" help="Method for leiden (defaults to matrix which is fast for small datasets). Select iGraph to avoid casting large data to a dense matrix."> + <option value="matrix" selected="true">Matrix</option> + <option value="igraph">iGraph</option> + </param> + <param name="graph_name" argument="--graph-name" type="text" value="RNA_nn" label="Graph Name" help="Name of graph to use for the clustering algorith."/> + <param name="nrandom_starts" argument="--nrandom-starts" type="integer" optional="true" label="Random starts" help="Number of random starts, 10 by default."/> + <param name="group_singletons" argument="--group-singletons" type="boolean" truevalue="--group-singletons" falsevalue="" checked="false" label="Group singletons" help="Group singletons into nearest cluster. If FALSE, assign all singletons to a 'singleton' group."/> + <param name="random_seed" argument="--random-seed" type="integer" optional="true" label="Random seed" help="Seed of the random number generator"/> </section> + </inputs> <outputs> <!-- <data name="out_pdf" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots" /> --> - <data name="output" format="rdata" from_work_dir="*.rds" label="${tool.name} on ${on_string}: Seurat RDS"/> + <expand macro="output_files"/> <data name="output_tab" format="csv" from_work_dir="output_tab" label="${tool.name} on ${on_string}: CSV Seurat Clusters"/> </outputs> <tests> <!-- Ensure count matrix input works --> <test> - <param name="input" ftype="rdata" value="out_runpca.rds"/> - <output name="output" ftype="rdata" value="out_findclust.rds" compare="sim_size"/> + <param name="rds_seurat_file" ftype="rdata" value="E-MTAB-6077-3k_features_90_cells-neighbours.rds"/> + <output name="rds_seurat_file" ftype="rdata" > + <assert_contents> + <has_size value="5064329" delta="200000"/> + </assert_contents> + </output> + <output name="output_tab" > + <assert_contents> + <has_n_lines n="92" /> + </assert_contents> + </output> </test> </tests> <help><![CDATA[ @@ -80,16 +96,14 @@ **What it does** -Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. -It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and -interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse -types of single cell data. +Identify clusters of cells by a shared nearest neighbor (SNN) modularity optimization +based clustering algorithm. First calculate k-nearest neighbors and construct t +he SNN graph (using Seurat find neighbours). +Then optimize the modularity function to determine clusters. +For a full description of the algorithms, see Waltman and van Eck (2013) +The European Physical Journal B. -Seurat clustering use SNN method to determine different clusters in your dataset. In order to construct a -SNN graph, you must have perform a PCA before launch this tool (you can use Seurat dimensional reduction). -It will search k (30) nearest neighbors for each cells and link cells to each other if they shared the -same neighbors. You can modulate the resolution in order to get larger (resolution superior to 1) or smaller -(inferior to 1) clusters. +@SEURAT_INTRO@ -----
--- a/seurat_macros.xml Wed Mar 13 12:51:22 2019 -0400 +++ b/seurat_macros.xml Sun Oct 01 09:06:23 2023 +0000 @@ -1,39 +1,343 @@ <?xml version="1.0"?> <macros> - - <token name="@VERSION@">0.0.5</token> - + <token name="@VERSION@">4.0.0</token> + <token name="@SEURAT_VERSION@">4.0.4</token> <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">seurat-scripts</requirement> </requirements> </xml> - <xml name="version"> <version_command><![CDATA[ echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ") ]]></version_command> </xml> + <xml name="input_object_params" token_multiple="False" token_varname="input" token_optional="False"> + <conditional name="@VARNAME@" label="Input format"> + <param type="select" name="format" label="Choose the format of the @VARNAME@" help="Seurat RDS, Seurat H5, Single Cell Experiment RDS, Loom or AnnData"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="loom">Loom</option> + <option value="h5seurat">Seurat HDF5</option> + <option value="anndata">AnnData</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + <when value="anndata"> + <param type="data" name="anndata_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="AnnData file" help="Select AnnData files for @VARNAME@" format="h5,h5ad"/> + </when> + <when value="loom"> + <param type="data" name="loom_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Loom file" help="Select Loom file(s) for @VARNAME@" format="h5,h5loom"/> + </when> + <when value="rds_seurat"> + <param type="data" name="rds_seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Seurat object for @VARNAME@" format="rdata"/> + </when> + <when value="rds_sce"> + <param type="data" name="rds_sce_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="RDS file" help="Select RDS file(s) with Single Cell Experiment object for @VARNAME@" format="rdata"/> + </when> + <when value="h5seurat"> + <param type="data" name="h5seurat_file" multiple="@MULTIPLE@" optional="@OPTIONAL@" label="Seurat HDF5" help="Select Seurat HDF5 file(s) for @VARNAME" format="h5"/> + </when> + </conditional> + </xml> + + <token name="@INPUT_OBJ_PREAMBLE@"> + #if $input.format == 'loom' + ln -s '$input.loom_file' input.loom; + #else if $input.format == 'h5seurat' + ln -s '$input.h5seurat_file' input.h5seurat; + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + cp '$input.anndata_file' input.h5ad; + #end if + </token> + + <token name="@INPUT_OBJECT@"> + #if $input.format == "anndata" + --input-object-file input.h5ad --input-format anndata + #else if $input.format == "loom" + --input-object-file input.loom --input-format loom + #else if $input.format == "rds_seurat" + --input-object-file '$input.rds_seurat_file' --input-format seurat + #else if $input.format == "rds_sce" + --input-object-file '$input.rds_sce_file' --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-file input.h5seurat --input-format h5seurat + #end if + </token> + + <token name="@QUERY_OBJ_PREAMBLE@"> + #if $query.format == 'loom' + ln -s '$query.loom_file' query.loom; + #else if $query.format == 'h5seurat' + ln -s '$query.h5seurat_file' query.h5seurat; + #else if $query.format == 'anndata' + ## it complains when using links for AnnData... + cp '$query.anndata_file' query.h5ad; + #end if + </token> + + <token name="@QUERY_OBJECT@"> + #if $query.format == "anndata" + --query-object-file query.h5ad --query-format anndata + #else if $query.format == "loom" + --query-object-file query.loom --query-format loom + #else if $query.format == "rds_seurat" + --query-object-file '$query.rds_seurat_file' --query-format seurat + #else if $query.format == "rds_sce" + --query-object-file '$query.rds_sce_file' --query-format singlecellexperiment + #else if $query.format == "h5seurat" + --query-object-file query.h5seurat --query-format h5seurat + #end if + </token> + + <token name="@ANCHORS_OBJ_PREAMBLE@"> + #if $anchors.format == 'loom' + ln -s '$anchors.loom_file' anchors.loom; + #else if $anchors.format == 'h5seurat' + ln -s '$anchors.h5seurat_file' anchors.h5seurat; + #else if $anchors.format == 'anndata' + ## it complains when using links for AnnData... + cp '$anchors.anndata_file' anchors.h5ad; + #end if + </token> + + <token name="@ANCHORS_OBJECT@"> + #if $anchors.format == "anndata" + --anchors-object-file anchors.h5ad --anchors-format anndata + #else if $anchors.format == "loom" + --anchors-object-file anchors.loom --anchors-format loom + #else if $anchors.format == "rds_seurat" + --anchors-object-file '$anchors.rds_seurat_file' --anchors-format seurat + #else if $anchors.format == "rds_sce" + --anchors-object-file '$anchors.rds_sce_file' --anchors-format singlecellexperiment + #else if $anchors.format == "h5seurat" + --anchors-object-file anchors.h5seurat --anchors-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJ_PREAMBLE@"> + #if $reference.format == 'loom' + ln -s '$reference.loom_file' reference.loom; + #else if $reference.format == 'h5seurat' + ln -s '$reference.h5seurat_file' reference.h5seurat; + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + cp '$reference.anndata_file' reference.h5ad; + #end if + </token> + + <token name="@REFERENCE_OBJECT@"> + #if $reference.format == "anndata" + --reference-object-file reference.h5ad --reference-format anndata + #else if $reference.format == "loom" + --reference-object-file reference.loom --reference-format loom + #else if $reference.format == "rds_seurat" + --reference-object-file '$reference.rds_seurat_file' --reference-format seurat + #else if $reference.format == "rds_sce" + --reference-object-file '$reference.rds_sce_file' --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" + --reference-object-file reference.h5seurat --reference-format h5seurat + #end if + </token> + + <token name="@INPUT_OBJS_PREAMBLE@"> + #if $input.format == 'loom' + #for $i, $fh in enumerate($input.loom_file): + ln -s '$fh' input.${i}.loom; + #end for + #else if $input.format == 'h5seurat' + #for $i, $fh in enumerate($input.h5seurat_file): + ln -s '$fh' input.${i}.h5seurat; + #end for + #else if $input.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($input.anndata_file): + cp '$fh' input.${i}.h5ad; + #end for + #end if + </token> + + <token name="@INPUT_OBJECTS@"> + #if $input.format == "anndata" + --input-object-files + #set file_array = [ "input."+str($i)+".h5ad" for $i, $fh in enumerate($input.anndata_file)] + #set files = ",".join($file_array) + ${files} + --input-format anndata + #else if $input.format == "loom" + --input-object-files + #set file_array = [ "input."+str($i)+".loom" for $i, $fh in enumerate($input.loom_file)] + #set files = ",".join($file_array) + ${files} + --input-format loom + #else if $input.format == "rds_seurat" + --input-object-files + #set file_array = $input.rds_seurat_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format seurat + #else if $input.format == "rds_sce" + --input-object-files + #set file_array = $input.rds_sce_file + #set files = ",".join([ str($fh) for $fh in $file_array ]) + ${files} + --input-format singlecellexperiment + #else if $input.format == "h5seurat" + --input-object-files + #set file_array = [ "input."+str($i)+".h5seurat" for $i, $fh in enumerate($input.h5seurat)] + #set files = ",".join($file_array) + ${files} + --input-format h5seurat + #end if + </token> + + <token name="@REFERENCE_OBJS_PREAMBLE@"> + #if $reference.format == 'loom' + #for $i, $fh in enumerate($reference.loom_file): + ln -s '$fh' reference.${i}.loom; + #end for + #else if $reference.format == 'h5seurat' + #for $i, $fh in enumerate($reference.h5seurat_file): + ln -s '$fh' reference.${i}.h5seurat; + #end for + #else if $reference.format == 'anndata' + ## it complains when using links for AnnData... + #for $i, $fh in enumerate($reference.anndata_file): + cp '$fh' reference.${i}.h5ad; + #end for + #end if + </token> + + <token name="@REFERENCE_OBJECTS@"> + #if $reference.format == "anndata" and $reference.anndata_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5ad" for $i, $fh in enumerate($reference.anndata_file)] + #set files = ",".join($file_array) + ${files} + --reference-format anndata + #else if $reference.format == "loom" and $reference.loom_file: + --reference-object-files + #set file_array = [ "reference."+str($i)+".loom" for $i, $fh in enumerate($reference.loom_file)] + #set files = ",".join($file_array) + ${files} + --reference-format loom + #else if $reference.format == "rds_seurat" and $reference.rds_seurat_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_seurat_file ]) + ${files} + --reference-format seurat + #else if $reference.format == "rds_sce" and $reference.rds_sce_file: + --reference-object-files + #set files = ",".join([ str($fh) for $fh in $reference.rds_sce_file ]) + ${files} + --reference-format singlecellexperiment + #else if $reference.format == "h5seurat" and $reference.h5seurat: + --reference-object-files + #set file_array = [ "reference."+str($i)+".h5seurat" for $i, $fh in enumerate($reference.h5seurat)] + #set files = ",".join($file_array) + ${files} + --reference-format h5seurat + #end if + </token> + + <xml name="output_object_params"> + <param type="select" name="format" label="Choose the format of the output" help="Seurat, Single Cell Experiment, AnnData or Loom"> + <option value="rds_seurat" selected="true">RDS with a Seurat object</option> + <option value="anndata">AnnData written by Seurat</option> + <option value="loom">Loom</option> + <option value="rds_sce">RDS with a Single Cell Experiment object</option> + </param> + </xml> + + <xml name="output_files"> + <data name="loom_file" from_work_dir="seurat_obj.loom" format="h5" label="${tool.name} on ${on_string}: Seurat Loom"> + <filter>format == 'loom'</filter> + </data> + <data name="rds_seurat_file" format="rdata" label="${tool.name} on ${on_string}: Seurat RDS"> + <filter>format == 'rds_seurat'</filter> + </data> + <data name="anndata_file" format="h5ad" label="${tool.name} on ${on_string}: AnnData from Seurat"> + <filter>format == 'anndata'</filter> + </data> + <data name="rds_sce_file" format="rdata" label="${tool.name} on ${on_string}: Seurat Single Cell Experiment RDS"> + <filter>format == 'rds_sce'</filter> + </data> + </xml> + + <token name="@OUTPUT_OBJECT@"> + #if $format == "anndata" + --output-object-file '$anndata_file' --output-format anndata + #else if $format == "loom" + --output-object-file seurat_obj.loom --output-format loom + #else if $format == "rds_seurat" + --output-object-file '$rds_seurat_file' --output-format seurat + #else if $format == "rds_sce" + --output-object-file '$rds_sce_file' --output-format singlecellexperiment + #end if + </token> + + <xml name="plot_output_files_format" token_format="png"> + <data label="Seurat ${plot_type.plot_type_selector} on ${on_string}: @FORMAT@ plot" name="plot_out_@FORMAT@" format='@FORMAT@' > + <filter>plot_format == '@FORMAT@'</filter> + </data> + </xml> + + <token name="@OUTPUT_PLOT@"> + #if $plot_format == "png" + --plot-out '$plot_out_png' + #else if $plot_format == "pdf" + --plot-out '$plot_out_pdf' + #else if $plot_format == "eps" + --plot-out '$plot_out_eps' + #else if $plot_format == "ps" + --plot-out '$plot_out_ps' + #else if $plot_format == "jpg" + --plot-out '$plot_out_jpg' + #else if $plot_format == "tiff" + --plot-out '$plot_out_tiff' + #else if $plot_format == "svg" + --plot-out '$plot_out_svg' + #end if + </token> + <xml name="genes-use-input"> - <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv, txt" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> + <param name="genes_use" argument="--genes-use" optional="true" type="data" format="tsv,txt,tabular" label="Genes to use" help="A file with gene names to use in construction of SNN graph if building directly based on expression data rather than a dimensionally reduced representation (i.e. PCs)."/> </xml> <xml name="dims-use-input"> <param name="dims_use" argument="--dims-use" min="1" optional="true" type="integer" label="PCA Dimensions to use" help="Number of PCs (dimensions) to use in construction of the SNN graph."/> </xml> + <token name="@SEURAT_INTRO@"><![CDATA[ +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. + ]]></token> + <token name="@VERSION_HISTORY@"><![CDATA[ **Version history** +4.0.0: Moves to Seurat 4.0.0, introducing a number of methods for merging datasets, plus the whole suite of Seurat plots. Pablo Moreno with funding from AstraZeneca. -0.0.1: Initial contribution. Maria Doyle, https://github.com/mblue9. +3.2.3+galaxy0: Moves to Seurat 3.2.3 and introduce convert method, improving format interconversion support. + +3.1.2_0.0.8: Update metadata parsing + +3.1.1_0.0.7: Exposes perplexity and enables tab input. + +3.1.1_0.0.6+galaxy0: Moved to Seurat 3. + + Find clusters: removed dims-use, k-param, prune-snn. 2.3.1+galaxy0: Improved documentation and further exposition of all script's options. Pablo Moreno, Jonathan Manning and Ni Huang, Expression Atlas team https://www.ebi.ac.uk/gxa/home at -EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski(https://github.com/drosofff) and Lea Bellenger(https://github.com/bellenger-l). +EMBL-EBI https://www.ebi.ac.uk/. Parts obtained from wrappers from Christophe Antoniewski (GitHub drosofff) and Lea Bellenger (GitHub bellenger-l). + +0.0.1: Initial contribution. Maria Doyle (GitHub mblue9). ]]></token> <xml name="citations"> <citations> + <citation type="doi">10.1038/s41592-021-01102-w</citation> <citation type="doi">10.1038/nbt.4096</citation> <citation type="bibtex"> @misc{r-seurat-scripts.git,