changeset 6:6990f58793ce draft default tip

"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 7ce9472049aa8e509049c4b9e15afd24f86c1b7d"
author ebi-gxa
date Wed, 04 Aug 2021 16:57:29 +0000
parents 7fc311b62935
children
files atlas-retrieve-macros.xml retrieve-scxa.xml
diffstat 2 files changed, 171 insertions(+), 64 deletions(-) [+]
line wrap: on
line diff
--- a/atlas-retrieve-macros.xml	Thu Sep 03 09:19:07 2020 +0000
+++ b/atlas-retrieve-macros.xml	Wed Aug 04 16:57:29 2021 +0000
@@ -1,10 +1,10 @@
 <macros>
-    <token name="@TOOL_VERSION@">1.0.1</token>
+    <token name="@TOOL_VERSION@">1.0.3</token>
     <token name="@HELP@">More information can be found at https://github.com/ebi-gene-expression-group/atlas-data-import</token>
-    <token name="@PROFILE@">18.01</token>
+    <token name="@PROFILE@">20.01</token>
     <xml name="requirements">
       <requirements>
-        <requirement type="package" version="0.0.11">atlas-data-import</requirement>
+        <requirement type="package" version="0.1.1">atlas-data-import</requirement>
             <yield/>
       </requirements>
     </xml>
@@ -15,6 +15,7 @@
     </xml>
     <token name="@VERSION_HISTORY@"><![CDATA[
 **Version history**
+1.0.2+galaxy0: Update downloader parameters and keep a single tool to import both expression data and classifiers.
 0.0.6+galaxy0: Initial contribution. Andrey Solovyev, Expression Atlas team https://www.ebi.ac.uk/gxa/home at EMBL-EBI https://www.ebi.ac.uk/.
     ]]></token>
     <xml name="citations">
@@ -32,4 +33,4 @@
         <yield />
       </citations>
     </xml>
-</macros>
+</macros>
\ No newline at end of file
--- a/retrieve-scxa.xml	Thu Sep 03 09:19:07 2020 +0000
+++ b/retrieve-scxa.xml	Wed Aug 04 16:57:29 2021 +0000
@@ -1,78 +1,178 @@
-<tool id="retrieve_scxa" name="Atlas import: get experiment data" version="@TOOL_VERSION@+galaxy0"  profile="@PROFILE@">
+<?xml version='1.0' encoding='utf-8'?>
+<tool id='retrieve_scxa' name='Atlas import: get experiment data' version='@TOOL_VERSION@+galaxy0'  profile='@PROFILE@'>
     <description>Retrieve expression matrices and metadata from EBI Single Cell Expression Atlas (SCXA)</description>
     <macros>
          <import>atlas-retrieve-macros.xml</import>
     </macros>
-    <expand macro="requirements" />
-    <command detect_errors="exit_code"><![CDATA[
-        ln -s "${accession_code}_${matrix_type}/10x_data/matrix.mtx" matrix.mtx &&
-        ln -s "${accession_code}_${matrix_type}/10x_data/genes.tsv" genes.tsv &&
-        ln -s "${accession_code}_${matrix_type}/10x_data/barcodes.tsv" barcodes.tsv &&
-        ln -s "${accession_code}_${matrix_type}/sdrf.txt" sdrf.txt &&
-        ln -s "${accession_code}_${matrix_type}/condensed-sdrf.tsv" condensed-sdrf.tsv &&
-        ln -s "${accession_code}_${matrix_type}/idf.txt" idf.txt &&
-        ln -s "${accession_code}_${matrix_type}/marker_genes_${number_of_clusters}.tsv" marker_genes_${number_of_clusters}.tsv &&
-        ln -s "${accession_code}_${matrix_type}/exp_design.tsv" exp_design.tsv &&
+    <expand macro='requirements' />
+    <command detect_errors='exit_code'><![CDATA[
+        #if $expression_data_params.get_expression_data
+          get_experiment_data.R --accession-code '${accession_code}' --get-expression-data '${expression_data_params.get_expression_data}' --matrix-type '${expression_data_params.matrix_type}' --get-marker-genes 'TRUE' --markers-cell-grouping '${expression_data_params.markers_cell_grouping}' &&
 
-        get_experiment_data.R --accesssion-code "${accession_code}" --matrix-type "${matrix_type}" --get-sdrf "${get_sdrf}" --get-condensed-sdrf "${get_condensed_sdrf}" --get-marker-genes "${get_marker_genes}"
+          mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/matrix.mtx' ${expr_mtx} &&
+          mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/genes.tsv' ${genes} &&
+          mv '${accession_code}_${expression_data_params.matrix_type}/10x_data/barcodes.tsv' ${barcodes} &&
+          mv '${accession_code}_${expression_data_params.matrix_type}/marker_genes_${expression_data_params.markers_cell_grouping}.tsv' ${marker_genes} &&
+        #end if 
+
+        #if $metadata_params.get_metadata
+          get_experiment_data.R --accession-code '${accession_code}' --get-expression-data 'FALSE' --matrix-type '${metadata_params.matrix_type}' --get-sdrf 'TRUE' --get-condensed-sdrf 'TRUE' --get-idf 'TRUE' --get-exp-design 'TRUE' &&
 
-        #if $config_file 
-        --config-file "${config_file}"
-        #end if        
-        #if $get_exp_design
-        --get-exp-design "${get_exp_design}" 
-        #end if 
-        #if $decorated_rows 
-        --decorated-rows "${decorated_rows}" 
+          mv '${accession_code}_${metadata_params.matrix_type}/sdrf.txt' ${sdrf} &&
+          mv '${accession_code}_${metadata_params.matrix_type}/condensed-sdrf.tsv' ${condensed_sdrf} &&
+          mv '${accession_code}_${metadata_params.matrix_type}/idf.txt' ${idf} &&
+          mv '${accession_code}_${metadata_params.matrix_type}/exp_design.tsv' ${exp_design} &&
         #end if
-        #if $use_default_expr_names 
-        --use-default-expr-names "${use_default_expr_names}" 
+
+        #if $classifier_params.get_classifiers
+          import_classification_data.R --tool '${classifier_params.tool}' --species '${classifier_params.species}'  --get-sdrf --condensed-sdrf --get-tool-perf-table
+
+          #if $classifier_params.classifier_accession_code
+            --accession-code '${classifier_params.classifier_accession_code}'
+          #end if
+          ;
         #end if
-        #if $get_idf 
-        --get-idf "${get_idf}" 
-        #end if
-        #if $number_of_clusters 
-        --number-of-clusters  "${number_of_clusters}" 
-        #end if
+        echo 'DONE'
     ]]></command>
     <inputs>
-        <param type="text" name="accession_code" label="SC-Atlas experiment accession" value="E-GEOD-100058" help="EBI Single Cell Atlas accession for the experiment that you want to retrieve." />
-        <param type="select" name="matrix_type" label="Choose the type of matrix to download" help="Type of matrix to be imported">
-            <option value="RAW">Raw</option>
-            <option value="FILTERED">Filtered Counts</option>
-            <option value="TPM">TPM-normalised</option>
-            <option value="CPM">CPM-normalised</option>
-        </param>
-        <param type="boolean" name="get_sdrf" checked="false" label="Import SDRF file" help="Boolean indicating whether SDRF file needs to be imported" />
-        <param type="boolean" name="get_exp_design" checked="false" label="Import experiment design file" help="Boolean indicating whether experiment design file needs to be imported" />
-        <param type="boolean" name="get_idf" checked="false" label="Import IDF file" help="Boolean indicating whether IDF file needs to be imported" />
-        <param type="boolean" name="get_condensed_sdrf" checked="false" label="Get condensed SDRF file" help="Boolean indicating whether condensed SDRF file needs to be imported" />
-        <param type="boolean" name="get_marker_genes" checked="false" label="Import marker genes" help="Boolean indicating whether marker genes should be imported" />
-        <param type="data" name="config_file" label="Config file" optional="true" format="yml" help="Config file with user-provided parameters" />
-        <param type="boolean" name="decorated_rows" checked="false" label="Decorated rows" help="Boolean indicating whether a decorated version of the rows should be imported" />
-        <param type="boolean" name="use_default_expr_names" checked="false"  label="Use default expr names" help="Should default (non 10x-type) file names be used for expression data? Default: FALSE" />
-        <param type="integer" name="number_of_clusters" value="0" label="Number of clusters" help="Number of clusters in marker genes file" />
+      <param type='text' name='accession_code' label='SC-Atlas experiment accession' help='EBI Single Cell Atlas accession for the experiment that you want to retrieve.' />
+      <conditional name='expression_data_params'>
+        <param name='get_expression_data' type='boolean' checked='false' label='Get Expression Data' help='If specified, expression data will be imported'/>
+        <when value='true'>
+          <param type='select' name='matrix_type' label='Choose the type of matrix to download' help='Type of matrix to be imported'>
+              <option value='RAW'>Raw Counts</option>
+              <option value='FILTERED'>Filtered Counts</option>
+              <option value='TPM'>TPM-normalised</option>
+              <option value='CPM'>CPM-normalised</option>
+          </param>
+          <param type='text' name='markers_cell_grouping' label='Markers Cell Grouping' value='inferred_cell_type_-_ontology_labels' help='What cell grouping should be used for marker genes? By default, marker genes for inferred cell types (ontology labels) are imported. When providing an integer value, marker genes for a corresponding number of clusters will be imported.' />
+        </when>
+      </conditional>
+      <conditional name='metadata_params'>
+        <param name='get_metadata' type='boolean' checked='false' label='Get Metadata' help='If specified, metadata for given experiment will be imported'/>
+        <when value='true'>
+          <param name='matrix_type' type='hidden' value='CPM' />
+        </when>
+      </conditional>
+      <conditional name='classifier_params'>
+        <param name='get_classifiers' type='boolean' checked='false' label='Import Classifiers' help='If specified, classifiers for a range of datasets will be imported alongside corresponding SDRF files and a tool performance table.' /> 
+        <when value='true'>
+          <param type='text' name='tool' label='Tool' help='For which tool should the classifiers be imported?' />
+          <param type='select' name='species' label='Choose species' help='Choose species for which to download classifiers'>
+              <option value='homo_sapiens'>Homo Sapiens</option>
+              <option value='mus_musculus'>Mus Musculus</option>
+          </param>
+          <param type='text' name='classifier_accession_code' label='SC-Atlas Classifier Accession(s)' optional='true' help='EBI Single Cell Atlas accession (or comma-separated string) for the experiment(s) which classifiers you want to retrieve. By default, all classifiers are imported.' />
+        </when>
+      </conditional>
     </inputs>
     <outputs>
-        <data name="expr_mtx" format="txt" from_work_dir="matrix.mtx" label="${tool.name} on ${on_string} ${accession_code} matrix.mtx (${matrix_type.value_label})" />
-        <data name="barcodes" format="txt" from_work_dir="barcodes.tsv" label="${tool.name} on ${on_string} ${accession_code} barcodes.tsv (${matrix_type.value_label})" />
-        <data name="genes" format="txt" from_work_dir="genes.tsv" label="${tool.name} on ${on_string} ${accession_code} genes.tsv (${matrix_type.value_label})" />
-        <data name="sdrf" format="txt" from_work_dir="sdrf.txt" label="${tool.name} on ${on_string} ${accession_code} sdrf.txt (${matrix_type.value_label})" >
-            <filter>get_sdrf</filter>
+        <data name='expr_mtx' format='txt' label='${tool.name} on ${on_string} ${accession_code} matrix.mtx (${expression_data_params.matrix_type.value_label})'>
+          <filter>expression_data_params['get_expression_data']</filter>
+        </data>
+        <data name='barcodes' format='txt' label='${tool.name} on ${on_string} ${accession_code} barcodes.tsv (${expression_data_params.matrix_type.value_label})'>
+          <filter>expression_data_params['get_expression_data']</filter>
         </data>
-        <data name="condensed_sdrf" format="txt" from_work_dir="condensed-sdrf.tsv" label="${tool.name} on ${on_string} ${accession_code} condensed-sdrf.tsv (${matrix_type.value_label})" >
-            <filter>get_condensed_sdrf</filter>
+        <data name='genes' format='txt' label='${tool.name} on ${on_string} ${accession_code} genes.tsv (${expression_data_params.matrix_type.value_label})'>
+          <filter>expression_data_params['get_expression_data']</filter>
+        </data>
+        <data name='marker_genes' format='tsv' label='${tool.name} on ${on_string} ${accession_code} ${accession_code}.marker_genes_${expression_data_params.markers_cell_grouping}.tsv'>
+          <filter>expression_data_params['get_expression_data']</filter>
+        </data>
+        <data name='sdrf' format='txt' label='${tool.name} on ${on_string} ${accession_code} sdrf.txt' >
+            <filter>metadata_params['get_metadata']</filter>
         </data>
-        <data name="idf" format="txt" from_work_dir="idf.txt" label="${tool.name} on ${on_string} ${accession_code} idf.txt (${matrix_type.value_label})">
-            <filter>get_idf</filter>
+        <data name='condensed_sdrf' format='txt' label='${tool.name} on ${on_string} ${accession_code} condensed-sdrf.tsv' >
+            <filter>metadata_params['get_metadata']</filter>
+        </data>
+        <data name='idf' format='txt' label='${tool.name} on ${on_string} ${accession_code} idf.txt'>
+            <filter>metadata_params['get_metadata']</filter>
+        </data>
+        <data name='exp_design' format='txt' label='${tool.name} on ${on_string} ${accession_code} experiment_design.tsv'>
+            <filter>metadata_params['get_metadata']</filter>
         </data>
-        <data name="marker_genes" from_work_dir="marker_genes_${number_of_clusters}.tsv" format="txt"  >
-            <filter>get_marker_genes</filter>
-        </data>
-        <data name="exp_design" from_work_dir="exp_design.tsv" format="txt"  >
-            <filter>get_exp_design</filter>
+        <collection name='imported_classifiers' type='list' format="rdata" label='Collection of imported classifiers'>
+            <discover_datasets pattern='__name__' directory='imported_classifiers' />
+            <filter>classifier_params['get_classifiers']</filter>
+        </collection>
+        <collection name='imported_sdrfs' type='list' label='Collection of imported SDRF files'>
+            <discover_datasets pattern='__name_and_ext__' directory='imported_SDRFs' />
+            <filter>classifier_params['get_classifiers']</filter>
+          </collection>
+          <data name='tool_perf_table' from_work_dir="tool_perf_pvals.tsv" format='tsv' label='Tool performance table'>
+            <filter>classifier_params['get_classifiers']</filter>
         </data>
     </outputs>
+    <tests>
+      <test>
+        <param name="expression_data_params|get_expression_data" value="true" />
+        <param name="metadata_params|get_metadata" value="true" />
+        <param name="classifier_params|get_classifiers" value="true" />
+        <param name="accession_code" value="E-MTAB-7249" />
+        <param name="expression_data_params|matrix_type" value="CPM" />
+        <param name="classifier_params|tool" value="scpred" />
+        <param name="classifier_params|classifier_accession_code" value="E-MTAB-7249" />
+        <output name="expr_mtx">
+          <assert_contents>
+            <has_line_matching expression="%%MatrixMarket.*" />
+         </assert_contents>
+        </output>
+        <output name="barcodes">
+          <assert_contents>
+             <has_line_matching expression="ERR.*" />
+          </assert_contents>
+        </output>
+        <output name="genes">
+          <assert_contents>
+            <has_line_matching expression="ENSG000000.*" />
+          </assert_contents>
+        </output>
+        <output name="marker_genes">
+          <assert_contents>
+            <has_text text="pvals_adj" />
+          </assert_contents>
+        </output>
+        <output name="sdrf">
+          <assert_contents>
+            <has_text text="Characteristics[organism]" />
+          </assert_contents>
+        </output>
+        <output name="condensed_sdrf">
+          <assert_contents>
+            <has_text text="characteristic" />
+          </assert_contents>
+        </output>
+        <output name="idf">
+          <assert_contents>
+            <has_text text="Comment[Submitted Name]" />
+          </assert_contents>
+        </output>
+        <output name="exp_design">
+          <assert_contents>
+            <has_text text="Sample Characteristic[organism]" />
+          </assert_contents>
+        </output>
+        <output name="tool_perf_table">
+          <assert_contents>
+            <has_text text="Tool" />
+          </assert_contents>
+        </output>
+        <output_collection name="imported_classifiers" type="list">
+          <element name="E-MTAB-7249_scpred.rds">
+            <assert_contents>
+              <has_size value="976000" delta="500000" />
+            </assert_contents>
+          </element>
+        </output_collection>
+        <output_collection name="imported_sdrfs" type="list">
+          <element name="E-MTAB-7249.condensed-sdrf.tsv">
+            <assert_contents>
+              <has_text text="characteristic" />
+            </assert_contents>
+          </element>
+        </output_collection>
+      </test>
+    </tests>
     <help><![CDATA[
 =================================================================================
 Gene expression analysis in single cells across species and biological conditions
@@ -81,7 +181,7 @@
 Single Cell Expression Atlas supports research in single cell transcriptomics.
 The Atlas annotates publicly available single cell RNA-Seq experiments with
 ontology identifiers and re-analyses them using standardised pipelines available
-through iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of
+throrugh iRAP, our RNA-Seq analysis toolkit. The browser enables visualisation of
 clusters of cells, their annotations and supports searches for gene expression
 within and across studies.
 
@@ -146,8 +246,14 @@
 :Marker gene file (txt):
   File containing information on marker genes that differentiate cell types present in the sequencing experiment. 
 
+:Classifiers (collection): 
+  Collection of pre-trained classifiers for specified tool/dataset combination. 
+
+:SDRF files for classifiers (collection):
+  Collection of SDRF files for imported classifiers (convenient outptut for donwstream processes)
+
 @HELP@
 @VERSION_HISTORY@
     ]]></help>
-    <expand macro="citations" />
+    <expand macro='citations' />
 </tool>