Mercurial > repos > ebi-gxa > retrieve_scxa
changeset 6:6990f58793ce draft default tip
"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 7ce9472049aa8e509049c4b9e15afd24f86c1b7d"
author | ebi-gxa |
---|---|
date | Wed, 04 Aug 2021 16:57:29 +0000 |
parents | 7fc311b62935 |
children | |
files | atlas-retrieve-macros.xml retrieve-scxa.xml |
diffstat | 2 files changed, 171 insertions(+), 64 deletions(-) [+] |
line wrap: on
line diff
--- a/atlas-retrieve-macros.xml Thu Sep 03 09:19:07 2020 +0000 +++ b/atlas-retrieve-macros.xml Wed Aug 04 16:57:29 2021 +0000 @@ -1,10 +1,10 @@ <macros> - <token name="@TOOL_VERSION@">1.0.1</token> + <token name="@TOOL_VERSION@">1.0.3</token> <token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token> - <token name="@PROFILE@">18.01</token> + <token name="@PROFILE@">20.01</token> <xml name="requirements"> <requirements> - <requirement type="package" version="0.0.11">atlas-data-import</requirement> + <requirement type="package" version="0.1.1">atlas-data-import</requirement> <yield/> </requirements> </xml> @@ -15,6 +15,7 @@ </xml> <token name="@VERSION_HISTORY@"><![CDATA[ **Version history** +1.0.2+galaxy0: Update downloader parameters and keep a single tool to import both expression data and classifiers. 0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/. ]]></token> <xml name="citations"> @@ -32,4 +33,4 @@ <yield /> </citations> </xml> -</macros> +</macros> \ No newline at end of file
--- a/retrieve-scxa.xml Thu Sep 03 09:19:07 2020 +0000 +++ b/retrieve-scxa.xml Wed Aug 04 16:57:29 2021 +0000 @@ -1,78 +1,178 @@ -<tool id="retrieve_scxa" name="Atlas import: get experiment data" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@"> +<?xml version='1.0' encoding='utf-8'?> +<tool id='retrieve_scxa' name='Atlas import: get experiment data' version='@TOOL_VERSION@+galaxy0' profile='@PROFILE@'> <description>Retrieve expression matrices and metadata from EBI Single Cell Expression Atlas (SCXA)</description> <macros> <import>atlas-retrieve-macros.xml</import> </macros> - <expand macro="requirements" /> - <command detect_errors="exit_code"><![CDATA[ - ln -s "${accession_code}_${matrix_type}/10x_data/matrix.mtx" matrix.mtx && - ln -s "${accession_code}_${matrix_type}/10x_data/genes.tsv" genes.tsv && - ln -s "${accession_code}_${matrix_type}/10x_data/barcodes.tsv" barcodes.tsv && - ln -s "${accession_code}_${matrix_type}/sdrf.txt" sdrf.txt && - ln -s "${accession_code}_${matrix_type}/condensed-sdrf.tsv" condensed-sdrf.tsv && - ln -s "${accession_code}_${matrix_type}/idf.txt" idf.txt && - ln -s "${accession_code}_${matrix_type}/marker_genes_${number_of_clusters}.tsv" marker_genes_${number_of_clusters}.tsv && - ln -s "${accession_code}_${matrix_type}/exp_design.tsv" exp_design.tsv && + <expand macro='requirements' /> + <command detect_errors='exit_code'><![CDATA[ + #if $expression_data_params.get_expression_data + get_experiment_data.R --accession-code '${accession_code}' --get-expression-data '${expression_data_params.get_expression_data}' --matrix-type '${expression_data_params.matrix_type}' --get-marker-genes 'TRUE' --markers-cell-grouping '${expression_data_params.markers_cell_grouping}' && - get_experiment_data.R --accesssion-code "${accession_code}" --matrix-type "${matrix_type}" --get-sdrf "${get_sdrf}" --get-condensed-sdrf "${get_condensed_sdrf}" --get-marker-genes "${get_marker_genes}" + mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/matrix.mtx' ${expr_mtx} && + mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/genes.tsv' ${genes} && + mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/barcodes.tsv' ${barcodes} && + mv '${accession_code}_${expression_data_params.matrix_type}/marker_genes_${expression_data_params.markers_cell_grouping}.tsv' ${marker_genes} && + #end if + + #if $metadata_params.get_metadata + get_experiment_data.R --accession-code '${accession_code}' --get-expression-data 'FALSE' --matrix-type '${metadata_params.matrix_type}' --get-sdrf 'TRUE' --get-condensed-sdrf 'TRUE' --get-idf 'TRUE' --get-exp-design 'TRUE' && - #if $config_file - --config-file "${config_file}" - #end if - #if $get_exp_design - --get-exp-design "${get_exp_design}" - #end if - #if $decorated_rows - --decorated-rows "${decorated_rows}" + mv '${accession_code}_${metadata_params.matrix_type}/sdrf.txt' ${sdrf} && + mv '${accession_code}_${metadata_params.matrix_type}/condensed-sdrf.tsv' ${condensed_sdrf} && + mv '${accession_code}_${metadata_params.matrix_type}/idf.txt' ${idf} && + mv '${accession_code}_${metadata_params.matrix_type}/exp_design.tsv' ${exp_design} && #end if - #if $use_default_expr_names - --use-default-expr-names "${use_default_expr_names}" + + #if $classifier_params.get_classifiers + import_classification_data.R --tool '${classifier_params.tool}' --species '${classifier_params.species}' --get-sdrf --condensed-sdrf --get-tool-perf-table + + #if $classifier_params.classifier_accession_code + --accession-code '${classifier_params.classifier_accession_code}' + #end if + ; #end if - #if $get_idf - --get-idf "${get_idf}" - #end if - #if $number_of_clusters - --number-of-clusters "${number_of_clusters}" - #end if + echo 'DONE' ]]></command> <inputs> - <param type="text" name="accession_code" label="SC-Atlas experiment accession" value="E-GEOD-100058" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve." /> - <param type="select" name="matrix_type" label="Choose the type of matrix to download" help="Type of matrix to be imported"> - <option value="RAW">Raw</option> - <option value="FILTERED">Filtered Counts</option> - <option value="TPM">TPM-normalised</option> - <option value="CPM">CPM-normalised</option> - </param> - <param type="boolean" name="get_sdrf" checked="false" label="Import SDRF file" help="Boolean indicating whether SDRF file needs to be imported" /> - <param type="boolean" name="get_exp_design" checked="false" label="Import experiment design file" help="Boolean indicating whether experiment design file needs to be imported" /> - <param type="boolean" name="get_idf" checked="false" label="Import IDF file" help="Boolean indicating whether IDF file needs to be imported" /> - <param type="boolean" name="get_condensed_sdrf" checked="false" label="Get condensed SDRF file" help="Boolean indicating whether condensed SDRF file needs to be imported" /> - <param type="boolean" name="get_marker_genes" checked="false" label="Import marker genes" help="Boolean indicating whether marker genes should be imported" /> - <param type="data" name="config_file" label="Config file" optional="true" format="yml" help="Config file with user-provided parameters" /> - <param type="boolean" name="decorated_rows" checked="false" label="Decorated rows" help="Boolean indicating whether a decorated version of the rows should be imported" /> - <param type="boolean" name="use_default_expr_names" checked="false" label="Use default expr names" help="Should default (non 10x-type) file names be used for expression data? Default: FALSE" /> - <param type="integer" name="number_of_clusters" value="0" label="Number of clusters" help="Number of clusters in marker genes file" /> + <param type='text' name='accession_code' label='SC-Atlas experiment accession' help='EBI Single Cell Atlas accession for the experiment that you want to retrieve.' /> + <conditional name='expression_data_params'> + <param name='get_expression_data' type='boolean' checked='false' label='Get Expression Data' help='If specified, expression data will be imported'/> + <when value='true'> + <param type='select' name='matrix_type' label='Choose the type of matrix to download' help='Type of matrix to be imported'> + <option value='RAW'>Raw Counts</option> + <option value='FILTERED'>Filtered Counts</option> + <option value='TPM'>TPM-normalised</option> + <option value='CPM'>CPM-normalised</option> + </param> + <param type='text' name='markers_cell_grouping' label='Markers Cell Grouping' value='inferred_cell_type_-_ontology_labels' help='What cell grouping should be used for marker genes? By default, marker genes for inferred cell types (ontology labels) are imported. When providing an integer value, marker genes for a corresponding number of clusters will be imported.' /> + </when> + </conditional> + <conditional name='metadata_params'> + <param name='get_metadata' type='boolean' checked='false' label='Get Metadata' help='If specified, metadata for given experiment will be imported'/> + <when value='true'> + <param name='matrix_type' type='hidden' value='CPM' /> + </when> + </conditional> + <conditional name='classifier_params'> + <param name='get_classifiers' type='boolean' checked='false' label='Import Classifiers' help='If specified, classifiers for a range of datasets will be imported alongside corresponding SDRF files and a tool performance table.' /> + <when value='true'> + <param type='text' name='tool' label='Tool' help='For which tool should the classifiers be imported?' /> + <param type='select' name='species' label='Choose species' help='Choose species for which to download classifiers'> + <option value='homo_sapiens'>Homo Sapiens</option> + <option value='mus_musculus'>Mus Musculus</option> + </param> + <param type='text' name='classifier_accession_code' label='SC-Atlas Classifier Accession(s)' optional='true' help='EBI Single Cell Atlas accession (or comma-separated string) for the experiment(s) which classifiers you want to retrieve. By default, all classifiers are imported.' /> + </when> + </conditional> </inputs> <outputs> - <data name="expr_mtx" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string} ${accession_code} matrix.mtx (${matrix_type.value_label})" /> - <data name="barcodes" format="txt" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string} ${accession_code} barcodes.tsv (${matrix_type.value_label})" /> - <data name="genes" format="txt" from_work_dir="genes.tsv" label="${tool.name} on ${on_string} ${accession_code} genes.tsv (${matrix_type.value_label})" /> - <data name="sdrf" format="txt" from_work_dir="sdrf.txt" label="${tool.name} on ${on_string} ${accession_code} sdrf.txt (${matrix_type.value_label})" > - <filter>get_sdrf</filter> + <data name='expr_mtx' format='txt' label='${tool.name} on ${on_string} ${accession_code} matrix.mtx (${expression_data_params.matrix_type.value_label})'> + <filter>expression_data_params['get_expression_data']</filter> + </data> + <data name='barcodes' format='txt' label='${tool.name} on ${on_string} ${accession_code} barcodes.tsv (${expression_data_params.matrix_type.value_label})'> + <filter>expression_data_params['get_expression_data']</filter> </data> - <data name="condensed_sdrf" format="txt" from_work_dir="condensed-sdrf.tsv" label="${tool.name} on ${on_string} ${accession_code} condensed-sdrf.tsv (${matrix_type.value_label})" > - <filter>get_condensed_sdrf</filter> + <data name='genes' format='txt' label='${tool.name} on ${on_string} ${accession_code} genes.tsv (${expression_data_params.matrix_type.value_label})'> + <filter>expression_data_params['get_expression_data']</filter> + </data> + <data name='marker_genes' format='tsv' label='${tool.name} on ${on_string} ${accession_code} ${accession_code}.marker_genes_${expression_data_params.markers_cell_grouping}.tsv'> + <filter>expression_data_params['get_expression_data']</filter> + </data> + <data name='sdrf' format='txt' label='${tool.name} on ${on_string} ${accession_code} sdrf.txt' > + <filter>metadata_params['get_metadata']</filter> </data> - <data name="idf" format="txt" from_work_dir="idf.txt" label="${tool.name} on ${on_string} ${accession_code} idf.txt (${matrix_type.value_label})"> - <filter>get_idf</filter> + <data name='condensed_sdrf' format='txt' label='${tool.name} on ${on_string} ${accession_code} condensed-sdrf.tsv' > + <filter>metadata_params['get_metadata']</filter> + </data> + <data name='idf' format='txt' label='${tool.name} on ${on_string} ${accession_code} idf.txt'> + <filter>metadata_params['get_metadata']</filter> + </data> + <data name='exp_design' format='txt' label='${tool.name} on ${on_string} ${accession_code} experiment_design.tsv'> + <filter>metadata_params['get_metadata']</filter> </data> - <data name="marker_genes" from_work_dir="marker_genes_${number_of_clusters}.tsv" format="txt" > - <filter>get_marker_genes</filter> - </data> - <data name="exp_design" from_work_dir="exp_design.tsv" format="txt" > - <filter>get_exp_design</filter> + <collection name='imported_classifiers' type='list' format="rdata" label='Collection of imported classifiers'> + <discover_datasets pattern='__name__' directory='imported_classifiers' /> + <filter>classifier_params['get_classifiers']</filter> + </collection> + <collection name='imported_sdrfs' type='list' label='Collection of imported SDRF files'> + <discover_datasets pattern='__name_and_ext__' directory='imported_SDRFs' /> + <filter>classifier_params['get_classifiers']</filter> + </collection> + <data name='tool_perf_table' from_work_dir="tool_perf_pvals.tsv" format='tsv' label='Tool performance table'> + <filter>classifier_params['get_classifiers']</filter> </data> </outputs> + <tests> + <test> + <param name="expression_data_params|get_expression_data" value="true" /> + <param name="metadata_params|get_metadata" value="true" /> + <param name="classifier_params|get_classifiers" value="true" /> + <param name="accession_code" value="E-MTAB-7249" /> + <param name="expression_data_params|matrix_type" value="CPM" /> + <param name="classifier_params|tool" value="scpred" /> + <param name="classifier_params|classifier_accession_code" value="E-MTAB-7249" /> + <output name="expr_mtx"> + <assert_contents> + <has_line_matching expression="%%MatrixMarket.*" /> + </assert_contents> + </output> + <output name="barcodes"> + <assert_contents> + <has_line_matching expression="ERR.*" /> + </assert_contents> + </output> + <output name="genes"> + <assert_contents> + <has_line_matching expression="ENSG000000.*" /> + </assert_contents> + </output> + <output name="marker_genes"> + <assert_contents> + <has_text text="pvals_adj" /> + </assert_contents> + </output> + <output name="sdrf"> + <assert_contents> + <has_text text="Characteristics[organism]" /> + </assert_contents> + </output> + <output name="condensed_sdrf"> + <assert_contents> + <has_text text="characteristic" /> + </assert_contents> + </output> + <output name="idf"> + <assert_contents> + <has_text text="Comment[Submitted Name]" /> + </assert_contents> + </output> + <output name="exp_design"> + <assert_contents> + <has_text text="Sample Characteristic[organism]" /> + </assert_contents> + </output> + <output name="tool_perf_table"> + <assert_contents> + <has_text text="Tool" /> + </assert_contents> + </output> + <output_collection name="imported_classifiers" type="list"> + <element name="E-MTAB-7249_scpred.rds"> + <assert_contents> + <has_size value="976000" delta="500000" /> + </assert_contents> + </element> + </output_collection> + <output_collection name="imported_sdrfs" type="list"> + <element name="E-MTAB-7249.condensed-sdrf.tsv"> + <assert_contents> + <has_text text="characteristic" /> + </assert_contents> + </element> + </output_collection> + </test> + </tests> <help><![CDATA[ ================================================================================= Gene expression analysis in single cells across species and biological conditions @@ -81,7 +181,7 @@ Single Cell Expression Atlas supports research in single cell transcriptomics. The Atlas annotates publicly available single cell RNA-Seq experiments with ontology identifiers and re-analyses them using standardised pipelines available -through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of +throrugh iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of clusters of cells, their annotations and supports searches for gene expression within and across studies. @@ -146,8 +246,14 @@ :Marker gene file (txt): File containing information on marker genes that differentiate cell types present in the sequencing experiment. +:Classifiers (collection): + Collection of pre-trained classifiers for specified tool/dataset combination. + +:SDRF files for classifiers (collection): + Collection of SDRF files for imported classifiers (convenient outptut for donwstream processes) + @HELP@ @VERSION_HISTORY@ ]]></help> - <expand macro="citations" /> + <expand macro='citations' /> </tool>