comparison filter.xml @ 14:eb36554fd6f9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/scanpy/ commit 91121b1e72696f17478dae383badaa71e9f96dbb
author iuc
date Sat, 14 Sep 2024 12:37:46 +0000
parents e299752da98e
children 2a55e0dae43a
comparison
equal deleted inserted replaced
13:e299752da98e 14:eb36554fd6f9
1 <tool id="scanpy_filter" name="Filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@"> 1 <tool id="scanpy_filter" name="Scanpy filter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>with scanpy</description> 2 <description>mark and subsample</description>
3 <macros> 3 <macros>
4 <import>macros.xml</import> 4 <import>macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="bio_tools"/> 6 <expand macro="bio_tools"/>
7 <expand macro="requirements"/> 7 <expand macro="requirements">
8 <requirement type="package" version="0.2.3">scrublet</requirement>
9 <requirement type="package" version="0.1.4">scikit-misc</requirement>
10 </expand>
8 <expand macro="version_command"/> 11 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[ 12 <command detect_errors="exit_code"><![CDATA[
10 @CMD@ 13 @CMD@
11 ]]></command> 14 ]]></command>
12 <configfiles> 15 <configfiles>
13 <configfile name="script_file"><![CDATA[ 16 <configfile name="script_file"><![CDATA[
14 @CMD_imports@ 17 @CMD_IMPORTS@
15 @CMD_read_inputs@ 18 @CMD_READ_INPUTS@
16 19
17 #if $method.method == 'pp.filter_cells' 20 #if $method.method == 'pp.filter_cells'
18 sc.pp.filter_cells( 21 sc.pp.filter_cells(
19 adata, 22 adata,
20 #if $method.filter.filter == 'min_counts' 23 #if $method.filter.filter == 'min_counts'
26 #else if $method.filter.filter == 'max_genes' 29 #else if $method.filter.filter == 'max_genes'
27 max_genes=$method.filter.max_genes, 30 max_genes=$method.filter.max_genes,
28 #end if 31 #end if
29 copy=False) 32 copy=False)
30 33
34 @CMD_ANNDATA_WRITE_OUTPUTS@
35
31 #else if $method.method == 'pp.filter_genes' 36 #else if $method.method == 'pp.filter_genes'
32 sc.pp.filter_genes( 37 sc.pp.filter_genes(
33 adata, 38 adata,
34 #if $method.filter.filter == 'min_counts' 39 #if $method.filter.filter == 'min_counts'
35 min_counts=$method.filter.min_counts, 40 min_counts=$method.filter.min_counts,
40 #else if $method.filter.filter == 'max_cells' 45 #else if $method.filter.filter == 'max_cells'
41 max_cells=$method.filter.max_cells, 46 max_cells=$method.filter.max_cells,
42 #end if 47 #end if
43 copy=False) 48 copy=False)
44 49
50 @CMD_ANNDATA_WRITE_OUTPUTS@
51
45 #else if $method.method == 'tl.filter_rank_genes_groups' 52 #else if $method.method == 'tl.filter_rank_genes_groups'
46 sc.tl.filter_rank_genes_groups( 53 sc.tl.filter_rank_genes_groups(
47 adata, 54 adata,
48 #if $method.key 55 #if $method.key
49 key='$method.key', 56 key='$method.key',
53 #end if 60 #end if
54 use_raw=$method.use_raw, 61 use_raw=$method.use_raw,
55 key_added='$method.key_added', 62 key_added='$method.key_added',
56 min_in_group_fraction=$method.min_in_group_fraction, 63 min_in_group_fraction=$method.min_in_group_fraction,
57 max_out_group_fraction=$method.max_out_group_fraction, 64 max_out_group_fraction=$method.max_out_group_fraction,
58 min_fold_change=$method.min_fold_change) 65 min_fold_change=$method.min_fold_change,
66 compare_abs=$method.compare_abs)
67
68 # Temporary fix for Issue reported here: https://github.com/scverse/anndata/issues/726
69 # Check and convert elements in 'rank_genes_groups_filtered' to strings
70 if 'rank_genes_groups_filtered' in adata.uns:
71 for key, value in adata.uns['rank_genes_groups_filtered'].items():
72 if not isinstance(value, str):
73 adata.uns['rank_genes_groups_filtered'][key] = str(value)
74
75 @CMD_ANNDATA_WRITE_OUTPUTS@
59 76
60 #else if $method.method == "pp.highly_variable_genes" 77 #else if $method.method == "pp.highly_variable_genes"
61 sc.pp.highly_variable_genes( 78 sc.pp.highly_variable_genes(
62 adata=adata, 79 adata=adata,
63 flavor='$method.flavor.flavor', 80 flavor='$method.flavor.flavor',
64 #if $method.flavor.flavor == 'seurat' 81 #if $method.flavor.flavor == 'seurat':
65 min_mean=$method.flavor.min_mean, 82 min_mean=$method.flavor.min_mean,
66 max_mean=$method.flavor.max_mean, 83 max_mean=$method.flavor.max_mean,
67 min_disp=$method.flavor.min_disp, 84 min_disp=$method.flavor.min_disp,
68 #if str($method.flavor.max_disp) != '' 85 #if str($method.flavor.max_disp) != ''
69 max_disp=$method.flavor.max_disp, 86 max_disp=$method.flavor.max_disp,
70 #end if 87 #end if
71 #else if $method.flavor.flavor == 'cell_ranger' 88 #else if $method.flavor.flavor == 'cell_ranger':
89 n_top_genes=$method.flavor.n_top_genes,
90 #else if $method.flavor.flavor == 'seurat_v3':
91 n_top_genes=$method.flavor.n_top_genes,
92 span=$method.flavor.span,
93 #else if $method.flavor.flavor == 'seurat_v3_paper':
72 n_top_genes=$method.flavor.n_top_genes, 94 n_top_genes=$method.flavor.n_top_genes,
73 #end if 95 #end if
74 n_bins=$method.n_bins, 96 n_bins=$method.n_bins,
75 subset=$method.subset, 97 subset=$method.subset,
98 #if $method.layer != ''
99 layer='$method.layer',
100 #end if
101 #if $method.batch_key != ''
102 layer='$method.batch_key',
103 #end if
76 inplace=True) 104 inplace=True)
105
106 @CMD_ANNDATA_WRITE_OUTPUTS@
77 107
78 #else if $method.method == 'pp.subsample' 108 #else if $method.method == 'pp.subsample'
79 sc.pp.subsample( 109 sc.pp.subsample(
80 data=adata, 110 data=adata,
81 #if $method.type.type == 'fraction' 111 #if $method.type.type == 'fraction'
84 n_obs=$method.type.n_obs, 114 n_obs=$method.type.n_obs,
85 #end if 115 #end if
86 random_state=$method.random_state, 116 random_state=$method.random_state,
87 copy=False) 117 copy=False)
88 118
119 @CMD_ANNDATA_WRITE_OUTPUTS@
120
89 #else if $method.method == "pp.downsample_counts" 121 #else if $method.method == "pp.downsample_counts"
122 #if str($method.counts_per_cell) != ''
123 print("Sum of counts for the first cell before:", adata.X[0, :].sum())
124 print("Sum of counts for the last cell before:", adata.X[adata.X.shape[0]-1, :].sum())
125 #else if str($method.total_counts) != ''
126 print("Sum of total counts before:", adata.X.sum())
127 #end if
128
90 sc.pp.downsample_counts( 129 sc.pp.downsample_counts(
91 adata=adata, 130 adata=adata,
92 #if str($method.counts_per_cell) != '' 131 #if str($method.counts_per_cell) != ''
93 counts_per_cell=$method.counts_per_cell, 132 counts_per_cell=$method.counts_per_cell,
94 #end if 133 #end if
97 #end if 136 #end if
98 random_state=$method.random_state, 137 random_state=$method.random_state,
99 replace=$method.replace, 138 replace=$method.replace,
100 copy=False) 139 copy=False)
101 140
141 #if str($method.counts_per_cell) != ''
142 print("Sum of counts for the first cell after:", adata.X[0, :].sum())
143 print("Sum of counts for the last cell after:", adata.X[adata.X.shape[0]-1, :].sum())
144 #else if str($method.total_counts) != ''
145 print("Sum of total counts after:", adata.X.sum())
146 #end if
147
148 @CMD_ANNDATA_WRITE_OUTPUTS@
149
102 #else if $method.method == "filter_marker" 150 #else if $method.method == "filter_marker"
103 151
104 #if $method.layer_selection.use_raw == 'False': 152 #if $method.layer_selection.use_raw == 'False':
105 adata.X = adata.layers['$method.layer_selection.layer'] 153 adata.X = adata.layers['$method.layer_selection.layer']
106 #end if 154 #end if
107 155
108 def check_marker(adata, group, gene, thresh_mean, thresh_frac, groupby): 156 def check_marker(adata, group, gene, thresh_mean, thresh_frac, groupby):
109 filtered_data = adata[adata.obs[groupby] == group, adata.var_names == gene] 157 filtered_data = adata[adata.obs[groupby] == group, adata.var_names == gene]
110 mean_expression = np.mean(filtered_data.X) 158 mean_expression = np.mean(filtered_data.X)
111 frac_cell_mean_expression = len(filtered_data.X[filtered_data.X > mean_expression]) / filtered_data.n_obs 159 frac_cell_mean_expression = len(filtered_data.X[filtered_data.X > mean_expression]) / filtered_data.n_obs
113 return(True) 161 return(True)
114 return(False) 162 return(False)
115 163
116 header='infer' 164 header='infer'
117 165
118 #if $method.header == 'not_included': 166 #if $method.header == 'not_included':
119 header=None 167 header=None
120 #end if 168 #end if
121 169
122 marker_list={key: list(value.values()) for key, value in pd.read_csv('$method.markerfile', sep='\t', index_col=0, header=header).to_dict(orient='index').items()} 170 marker_list={key: list(value.values()) for key, value in pd.read_csv('$method.markerfile', sep='\t', index_col=0, header=header).to_dict(orient='index').items()}
123 171
124 for key, value in marker_list.items(): 172 for key, value in marker_list.items():
125 marker_list[key] = [x for x in value if check_marker(adata, key, x, $method.thresh_mean, $method.thresh_frac, '$method.groupby')] 173 marker_list[key] = [x for x in value if check_marker(adata, key, x, $method.thresh_mean, $method.thresh_frac, '$method.groupby')]
131 for key, value in marker_list.items(): 179 for key, value in marker_list.items():
132 marker_list[key] = value + [''] * (max_len - len(value)) 180 marker_list[key] = value + [''] * (max_len - len(value))
133 181
134 df = pd.DataFrame(marker_list).T 182 df = pd.DataFrame(marker_list).T
135 df.to_csv('marker.tsv', sep='\t', index=True) 183 df.to_csv('marker.tsv', sep='\t', index=True)
184
185 #else if $method.method == "pp.scrublet"
186 sc.pp.scrublet(
187 adata,
188 #if $method.batch_key != ''
189 batch_key='$method.batch_key',
190 #end if
191 sim_doublet_ratio=$method.sim_doublet_ratio,
192 expected_doublet_rate=$method.expected_doublet_rate,
193 stdev_doublet_rate=$method.stdev_doublet_rate,
194 synthetic_doublet_umi_subsampling=$method.synthetic_doublet_umi_subsampling,
195 knn_dist_metric='$method.knn_dist_metric',
196 normalize_variance=$method.normalize_variance,
197 log_transform=$method.log_transform,
198 mean_center=$method.mean_center,
199 n_prin_comps=$method.n_prin_comps,
200 use_approx_neighbors=$method.use_approx_neighbors,
201 get_doublet_neighbor_parents=$method.get_doublet_neighbor_parents,
202 #if str($method.n_neighbors) != ''
203 n_neighbors=$method.n_neighbors,
204 #end if
205 #if str($method.threshold) != ''
206 threshold=$method.threshold,
207 #end if
208 random_state=$method.random_state)
209
210 @CMD_ANNDATA_WRITE_OUTPUTS@
136 #end if 211 #end if
137 212 ]]>
138 @CMD_anndata_write_outputs@ 213 </configfile>
139 ]]></configfile>
140 </configfiles> 214 </configfiles>
141 <inputs> 215 <inputs>
142 <expand macro="inputs_anndata"/> 216 <expand macro="inputs_anndata"/>
143 <conditional name="method"> 217 <conditional name="method">
144 <param argument="method" type="select" label="Method used for filtering"> 218 <param argument="method" type="select" label="Method used for filtering">
147 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option> 221 <option value="tl.filter_rank_genes_groups">Filters out genes based on fold change and fraction of genes expressing the gene within and outside the groupby categories, using 'tl.filter_rank_genes_groups'</option>
148 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option> 222 <option value="pp.highly_variable_genes">Annotate (and filter) highly variable genes, using 'pp.highly_variable_genes'</option>
149 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option> 223 <option value="pp.subsample">Subsample to a fraction of the number of observations, using 'pp.subsample'</option>
150 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option> 224 <option value="pp.downsample_counts">Downsample counts from count matrix, using 'pp.downsample_counts'</option>
151 <option value="filter_marker">Filter markers from count matrix and marker list</option> 225 <option value="filter_marker">Filter markers from count matrix and marker list</option>
226 <option value="pp.scrublet">Predict doublets using 'pp.scrublet'</option>
152 </param> 227 </param>
153 <when value="pp.filter_cells"> 228 <when value="pp.filter_cells">
154 <conditional name="filter"> 229 <conditional name="filter">
155 <param argument="filter" type="select" label="Filter"> 230 <param argument="filter" type="select" label="Filter">
156 <option value="min_counts">Minimum number of counts</option> 231 <option value="min_counts" selected="true">Minimum number of counts</option>
157 <option value="max_counts">Maximum number of counts</option> 232 <option value="max_counts">Maximum number of counts</option>
158 <option value="min_genes">Minimum number of genes expressed</option> 233 <option value="min_genes">Minimum number of genes expressed</option>
159 <option value="max_genes">Maximum number of genes expressed</option> 234 <option value="max_genes">Maximum number of genes expressed</option>
160 </param> 235 </param>
161 <when value="min_counts"> 236 <when value="min_counts">
162 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering" help=""/> 237 <param argument="min_counts" type="integer" min="0" value="" label="Minimum number of counts required for a cell to pass filtering"/>
163 </when> 238 </when>
164 <when value="max_counts"> 239 <when value="max_counts">
165 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering" help=""/> 240 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a cell to pass filtering"/>
166 </when> 241 </when>
167 <when value="min_genes"> 242 <when value="min_genes">
168 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering" help=""/> 243 <param argument="min_genes" type="integer" min="0" value="" label="Minimum number of genes expressed required for a cell to pass filtering"/>
169 </when> 244 </when>
170 <when value="max_genes"> 245 <when value="max_genes">
171 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering" help=""/> 246 <param argument="max_genes" type="integer" min="0" value="" label="Maximum number of genes expressed required for a cell to pass filtering"/>
172 </when> 247 </when>
173 </conditional> 248 </conditional>
174 </when> 249 </when>
175 <when value="pp.filter_genes"> 250 <when value="pp.filter_genes">
176 <conditional name="filter"> 251 <conditional name="filter">
177 <param argument="filter" type="select" label="Filter"> 252 <param argument="filter" type="select" label="Filter">
178 <option value="min_counts">Minimum number of counts</option> 253 <option value="min_counts" selected="true">Minimum number of counts</option>
179 <option value="max_counts">Maximum number of counts</option> 254 <option value="max_counts">Maximum number of counts</option>
180 <option value="min_cells">Minimum number of cells expressed</option> 255 <option value="min_cells">Minimum number of cells expressed</option>
181 <option value="max_cells">Maximum number of cells expressed</option> 256 <option value="max_cells">Maximum number of cells expressed</option>
182 </param> 257 </param>
183 <when value="min_counts"> 258 <when value="min_counts">
186 <when value="max_counts"> 261 <when value="max_counts">
187 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/> 262 <param argument="max_counts" type="integer" min="0" value="" label="Maximum number of counts required for a gene to pass filtering"/>
188 </when> 263 </when>
189 <when value="min_cells"> 264 <when value="min_cells">
190 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/> 265 <param argument="min_cells" type="integer" min="0" value="" label="Minimum number of cells expressed required for a gene to pass filtering"/>
191 </when> 266 </when>
192 <when value="max_cells"> 267 <when value="max_cells">
193 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/> 268 <param argument="max_cells" type="integer" min="0" value="" label="Maximum number of cells expressed required for a gene to pass filtering"/>
194 </when> 269 </when>
195 </conditional> 270 </conditional>
196 </when> 271 </when>
197 <when value="tl.filter_rank_genes_groups"> 272 <when value="tl.filter_rank_genes_groups">
198 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored"> 273 <param argument="key" type="text" optional="true" label="Key in adata.uns where the rank_genes_groups output is stored">
199 <expand macro="sanitize_query" /> 274 <expand macro="sanitize_query"/>
200 </param> 275 </param>
201 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider"> 276 <param argument="groupby" type="text" optional="true" label="The key of the observations grouping to consider">
202 <expand macro="sanitize_query" /> 277 <expand macro="sanitize_query"/>
203 </param> 278 </param>
204 <expand macro="param_use_raw"/> 279 <expand macro="param_use_raw"/>
205 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values"> 280 <param argument="key_added" type="text" value="rank_genes_groups_filtered" label="Key that will contain new values">
206 <expand macro="sanitize_query" /> 281 <expand macro="sanitize_query"/>
207 </param> 282 </param>
208 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/> 283 <param argument="min_in_group_fraction" type="float" min="0" max="1" value="0.25" label="Minimum fraction of genes expressing the gene within the categories"/>
209 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/> 284 <param argument="max_out_group_fraction" type="float" min="0" max="1" value="0.5" label="Maximum fraction of genes expressing the gene outside the categories"/>
210 <param argument="min_fold_change" type="integer" value="2" label="Minimum fold change"/> 285 <param argument="min_fold_change" type="integer" value="1" label="Minimum fold change"/>
286 <param argument="compare_abs" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If selected, compare absolute values of log fold change with min_fold_change"/>
211 </when> 287 </when>
212 <when value="pp.highly_variable_genes"> 288 <when value="pp.highly_variable_genes">
213 <conditional name='flavor'> 289 <conditional name='flavor'>
214 <param argument="flavor" type="select" label="Flavor for computing normalized dispersion"> 290 <param argument="flavor" type="select" label="Choose the flavor for identifying highly variable genes" help="Expects logarithmized data, except when flavor='seurat_v3'/'seurat_v3_paper', in which count">
215 <option value="seurat">Seurat</option> 291 <option value="seurat" selected="true">Seurat</option>
216 <option value="cell_ranger">Cell Ranger</option> 292 <option value="cell_ranger">Cell Ranger</option>
293 <option value="seurat_v3">Seurat v3</option>
294 <option value="seurat_v3_paper">Seurat v3 (paper)</option>
217 </param> 295 </param>
218 <when value="seurat"> 296 <when value="seurat">
219 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/> 297 <param argument="min_mean" type="float" value="0.0125" label="Minimal mean cutoff"/>
220 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/> 298 <param argument="max_mean" type="float" value="3" label="Maximal mean cutoff"/>
221 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/> 299 <param argument="min_disp" type="float" value="0.5" label="Minimal normalized dispersion cutoff"/>
222 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/> 300 <param argument="max_disp" type="float" value="" optional="true" label="Maximal normalized dispersion cutoff"/>
223 </when> 301 </when>
224 <when value="cell_ranger"> 302 <when value="cell_ranger">
225 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/> 303 <param argument="n_top_genes" type="integer" value="" label="Number of highly-variable genes to keep"/>
226 </when> 304 </when>
305 <when value="seurat_v3">
306 <param argument="n_top_genes" type="integer" value="" optional="false" label="Number of highly-variable genes to keep"/>
307 <param argument="span" type="float" value="0.3" label="The fraction of the data (cells) used when estimating the variance in the loess model fit"/>
308 </when>
309 <when value="seurat_v3_paper">
310 <param argument="n_top_genes" type="integer" value="" optional="false" label="Number of highly-variable genes to keep"/>
311 </when>
227 </conditional> 312 </conditional>
228 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/> 313 <param argument="n_bins" type="integer" value="20" label="Number of bins for binning the mean gene expression" help="Normalization is done with respect to each bin. If just a single gene falls into a bin, the normalized dispersion is artificially set to 1"/>
229 <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/> 314 <param argument="subset" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Inplace subset to highly-variable genes?" help="Otherwise it merely indicates highly variable genes."/>
315 <expand macro="param_layer"/>
316 <param argument="batch_key" type="text" value="" label="Specify the batch key" help="If specified, highly-variable genes are selected within each batch separately and merged.">
317 <expand macro="sanitize_query"/>
318 </param>
230 </when> 319 </when>
231 <when value="pp.subsample"> 320 <when value="pp.subsample">
232 <conditional name="type"> 321 <conditional name="type">
233 <param name="type" type="select" label="Type of subsampling"> 322 <param name="type" type="select" label="Type of subsampling">
234 <option value="fraction">By fraction</option> 323 <option value="fraction" selected="true">By fraction</option>
235 <option value="n_obs">By number of observation</option> 324 <option value="n_obs">By number of observation</option>
236 </param> 325 </param>
237 <when value="fraction"> 326 <when value="fraction">
238 <param argument="fraction" type="float" value="" label="Subsample to this 'fraction' of the number of observations"/> 327 <param argument="fraction" type="float" min="0" value="" label="Subsample to this 'fraction' of the number of observations"/>
239 </when> 328 </when>
240 <when value="n_obs"> 329 <when value="n_obs">
241 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/> 330 <param argument="n_obs" type="integer" min="0" value="" label="Subsample to this number of observations"/>
242 </when> 331 </when>
243 </conditional> 332 </conditional>
249 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/> 338 <param argument="random_state" type="integer" value="0" label="Random seed to change subsampling"/>
250 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/> 339 <param argument="replace" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sample the counts with replacement?"/>
251 </when> 340 </when>
252 <when value="filter_marker"> 341 <when value="filter_marker">
253 <param argument="markerfile" type="data" format="tabular" label="List of markers" help="This should be a tsv where row = group (e.g. celltypes) and columns = markers."></param> 342 <param argument="markerfile" type="data" format="tabular" label="List of markers" help="This should be a tsv where row = group (e.g. celltypes) and columns = markers."></param>
254 <param name="header" type="select" label="Header in the list of markers?"> 343 <param name="header" type="boolean" truevalue="included" falsevalue="not_included" checked="true" label="Header is included in the list of markers?"/>
255 <option value="included">Header incldued</option>
256 <option value="not_included">Header not included</option>
257 </param>
258 <param argument="thresh_mean" type="float" min="0.0" value="1.0" label="Minimal average count of all cells of a group (e.g., celltype) for a particular marker" help="Increasing the threshold will result in a smaller marker set."/> 344 <param argument="thresh_mean" type="float" min="0.0" value="1.0" label="Minimal average count of all cells of a group (e.g., celltype) for a particular marker" help="Increasing the threshold will result in a smaller marker set."/>
259 <param argument="thresh_frac" type="float" min="0.0" max="1.0" value="0.1" label="Minimal fractions of cells that has a higher count than the average count of all cells of the group for the marker" help="Increasing this threshold might remove marker outliers."/> 345 <param argument="thresh_frac" type="float" min="0.0" max="1.0" value="0.1" label="Minimal fractions of cells that has a higher count than the average count of all cells of the group for the marker" help="Increasing this threshold might remove marker outliers."/>
260 <conditional name="layer_selection"> 346 <conditional name="layer_selection">
261 <param name="use_raw" type="select" label="Use .X of adata to perform the filtering" help=""> 347 <param name="use_raw" type="select" label="Use .X of adata to perform the filtering">
262 <option value="True">Yes</option> 348 <option value="True" selected="true">Yes</option>
263 <option value="False">No</option> 349 <option value="False">No</option>
264 </param> 350 </param>
265 <when value="False"> 351 <when value="False">
266 <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to filter" help="If layers specified then use adata.layers[layer]."/> 352 <param argument="layer" type="text" value="" label="Key from adata.layers whose value will be used to filter" help="If layers specified then use adata.layers[layer]."/>
267 </when> 353 </when>
268 <when value="True"/> 354 <when value="True"/>
269 </conditional> 355 </conditional>
270 <param argument="groupby" type="text" value="" label="The key of the observation grouping to consider (e.g., celltype)" help=""> 356 <param argument="groupby" type="text" value="" label="The key of the observation grouping to consider (e.g., celltype)">
357 <expand macro="sanitize_query"/>
358 </param>
359 </when>
360 <when value="pp.scrublet">
361 <param argument="batch_key" type="text" value="" optional="true" label="Batch key for the concatenate">
271 <expand macro="sanitize_query" /> 362 <expand macro="sanitize_query" />
272 </param> 363 </param>
364 <param argument="sim_doublet_ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed transcriptomes"/>
365 <param argument="expected_doublet_rate" type="float" value="0.05" label="The estimated doublet rate for the experiment"/>
366 <param argument="stdev_doublet_rate" type="float" value="0.02" label="Uncertainty in the expected doublet rate"/>
367 <param argument="synthetic_doublet_umi_subsampling" type="float" value="1.0" label="Rate for sampling UMIs when creating synthetic doublets" help="f 1.0, each doublet is created by simply adding the UMI counts from two randomly sampled observed transcriptomes. For values less than 1, the UMI counts are added and then randomly sampled at the specified rate."/>
368 <param name="knn_dist_metric" type="select" label="Distance metric used when finding nearest neighbors">
369 <expand macro="distance_metric_options"/>
370 </param>
371 <param argument="normalize_variance" type="boolean" truevalue="True" falsevalue="False" checked="true" label="normalize the data such that each gene has a variance of 1"/>
372 <param argument="log_transform" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use log1p() to log-transform the data prior to PCA"/>
373 <param argument="mean_center" type="boolean" truevalue="True" falsevalue="False" checked="true" label="If True, center the data such that each gene has a mean of 0"/>
374 <param argument="n_prin_comps" type="integer" value="30" label="Number of principal components used to embed the transcriptomes prior to k-nearest-neighbor graph construction"/>
375 <param argument="use_approx_neighbors" type="boolean" truevalue="True" falsevalue="None" checked="false" label="Use approximate nearest neighbor method (annoy) for the KNN classifier"/>
376 <param argument="get_doublet_neighbor_parents" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If True, return (in .uns) the parent transcriptomes that generated the doublet neighbors of each observed transcriptome" help="This information can be used to infer the cell states that generated a given doublet state."/>
377 <param argument="n_neighbors" type="integer" value="" optional="true" label="Number of neighbors used to construct the KNN graph of observed transcriptomes and simulated doublets"/>
378 <param argument="threshold" type="float" value="" optional="true" label="Doublet score threshold for calling a transcriptome a doublet" help="If None, this is set automatically"/>
379 <param name="random_state" type="integer" value="0" label="Initial state for doublet simulation and nearest neighbors"/>
273 </when> 380 </when>
274 </conditional> 381 </conditional>
275 <expand macro="inputs_common_advanced"/> 382 <expand macro="inputs_common_advanced"/>
276 </inputs> 383 </inputs>
277 <outputs> 384 <outputs>
278 <expand macro="anndata_outputs"/> 385 <expand macro="anndata_outputs">
386 <filter>method['method'] != 'filter_marker'</filter>
387 </expand>
279 <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers"> 388 <data name="marker_out" format="tabular" from_work_dir="marker.tsv" label="${tool.name} on ${on_string}: Markers">
280 <filter>method['method'] == 'filter_marker'</filter> 389 <filter>method['method'] == 'filter_marker'</filter>
281 </data> 390 </data>
282 </outputs> 391 </outputs>
283 <tests> 392 <tests>
284 <test expect_num_outputs="2"> 393 <!-- test 1 -->
285 <!-- test 1 --> 394 <test expect_num_outputs="2">
286 <param name="adata" value="krumsiek11.h5ad" /> 395 <param name="adata" value="krumsiek11.h5ad"/>
287 <conditional name="method"> 396 <conditional name="method">
288 <param name="method" value="pp.filter_cells"/> 397 <param name="method" value="pp.filter_cells"/>
289 <conditional name="filter"> 398 <conditional name="filter">
290 <param name="filter" value="min_counts"/> 399 <param name="filter" value="min_counts"/>
291 <param name="min_counts" value="3"/> 400 <param name="min_counts" value="3"/>
292 </conditional> 401 </conditional>
293 </conditional> 402 </conditional>
403 <section name="advanced_common">
404 <param name="show_log" value="true"/>
405 </section>
294 <assert_stdout> 406 <assert_stdout>
295 <has_text_matching expression="336 × 11"/> 407 <has_text_matching expression="336 × 11"/>
296 </assert_stdout> 408 </assert_stdout>
297 <section name="advanced_common">
298 <param name="show_log" value="true" />
299 </section>
300 <output name="hidden_output"> 409 <output name="hidden_output">
301 <assert_contents> 410 <assert_contents>
302 <has_text_matching expression="sc.pp.filter_cells"/> 411 <has_text_matching expression="sc.pp.filter_cells"/>
303 <has_text_matching expression="min_counts=3"/> 412 <has_text_matching expression="min_counts=3"/>
304 </assert_contents> 413 </assert_contents>
305 </output> 414 </output>
306 <output name="anndata_out" file="pp.filter_cells.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> 415 <output name="anndata_out" ftype="h5ad">
307 </test> 416 <assert_contents>
308 <test expect_num_outputs="2"> 417 <has_h5_keys keys="obs/cell_type"/>
309 <!-- test 2 --> 418 </assert_contents>
310 <param name="adata" value="krumsiek11.h5ad" /> 419 </output>
420 </test>
421
422 <!-- test 2 -->
423 <test expect_num_outputs="2">
424 <param name="adata" value="krumsiek11.h5ad"/>
311 <conditional name="method"> 425 <conditional name="method">
312 <param name="method" value="pp.filter_cells"/> 426 <param name="method" value="pp.filter_cells"/>
313 <conditional name="filter"> 427 <conditional name="filter">
314 <param name="filter" value="max_genes"/> 428 <param name="filter" value="max_genes"/>
315 <param name="max_genes" value="100"/> 429 <param name="max_genes" value="10"/>
316 </conditional> 430 </conditional>
317 </conditional> 431 </conditional>
318 <section name="advanced_common"> 432 <section name="advanced_common">
319 <param name="show_log" value="true" /> 433 <param name="show_log" value="true"/>
320 </section> 434 </section>
435 <assert_stdout>
436 <has_text_matching expression="354 × 11"/>
437 </assert_stdout>
321 <output name="hidden_output"> 438 <output name="hidden_output">
322 <assert_contents> 439 <assert_contents>
323 <has_text_matching expression="sc.pp.filter_cells"/> 440 <has_text_matching expression="sc.pp.filter_cells"/>
324 <has_text_matching expression="adata"/> 441 <has_text_matching expression="adata"/>
325 <has_text_matching expression="max_genes=100"/> 442 <has_text_matching expression="max_genes=10"/>
326 </assert_contents> 443 </assert_contents>
327 </output> 444 </output>
328 <output name="anndata_out" file="pp.filter_cells.krumsiek11-max_genes.h5ad" ftype="h5ad" compare="sim_size"/> 445 <output name="anndata_out" ftype="h5ad">
329 </test> 446 <assert_contents>
330 <test expect_num_outputs="2"> 447 <has_h5_keys keys="obs/cell_type"/>
331 <!-- test 3 --> 448 </assert_contents>
332 <param name="adata" value="krumsiek11.h5ad" /> 449 </output>
450 </test>
451
452 <!-- test 3 -->
453 <test expect_num_outputs="2">
454 <param name="adata" value="krumsiek11.h5ad"/>
333 <conditional name="method"> 455 <conditional name="method">
334 <param name="method" value="pp.filter_genes"/> 456 <param name="method" value="pp.filter_genes"/>
335 <conditional name="filter"> 457 <conditional name="filter">
336 <param name="filter" value="min_counts"/> 458 <param name="filter" value="min_counts"/>
337 <param name="min_counts" value="3"/> 459 <param name="min_counts" value="100"/>
338 </conditional> 460 </conditional>
339 </conditional> 461 </conditional>
340 <section name="advanced_common"> 462 <section name="advanced_common">
341 <param name="show_log" value="true" /> 463 <param name="show_log" value="true"/>
342 </section> 464 </section>
465 <assert_stdout>
466 <has_text_matching expression="640 × 8"/>
467 </assert_stdout>
343 <output name="hidden_output"> 468 <output name="hidden_output">
344 <assert_contents> 469 <assert_contents>
345 <has_text_matching expression="sc.pp.filter_genes"/> 470 <has_text_matching expression="sc.pp.filter_genes"/>
346 <has_text_matching expression="min_counts=3"/> 471 <has_text_matching expression="min_counts=100"/>
347 </assert_contents> 472 </assert_contents>
348 </output> 473 </output>
349 <output name="anndata_out" file="pp.filter_genes.krumsiek11-min_counts.h5ad" ftype="h5ad" compare="sim_size"/> 474 <output name="anndata_out" ftype="h5ad">
475 <assert_contents>
476 <has_h5_keys keys="obs/cell_type"/>
477 </assert_contents>
478 </output>
350 </test> 479 </test>
351 480
352 <!-- test 4 --> 481 <!-- test 4 -->
353 <!-- Fails to write to anndata after tl.filter_rank_genes_groups 482 <!-- Fails to write to anndata after tl.filter_rank_genes_groups
354 Issue has been reported here: https://github.com/scverse/anndata/issues/726 483 Issue has been reported here: https://github.com/scverse/anndata/issues/726
355 The current fix is: del adata.uns['rank_genes_groups_filtered'] --> 484 The current fix is: del adata.uns['rank_genes_groups_filtered'] -->
356 <!--<test expect_num_outputs="2"> 485 <!-- The issue is fixed in the script here -->
357 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad" /> 486 <test expect_num_outputs="2">
487 <param name="adata" value="tl.rank_genes_groups.krumsiek11.h5ad"/>
358 <conditional name="method"> 488 <conditional name="method">
359 <param name="method" value="tl.filter_rank_genes_groups"/> 489 <param name="method" value="tl.filter_rank_genes_groups"/>
360 <param name="key" value="rank_genes_groups"/> 490 <param name="key" value="rank_genes_groups"/>
361 <param name="use_raw" value="False"/>
362 <param name="key_added" value="rank_genes_groups_filtered"/>
363 <param name="min_in_group_fraction" value="0.25"/>
364 <param name="max_out_group_fraction" value="0.5"/>
365 <param name="min_fold_change" value="3"/> 491 <param name="min_fold_change" value="3"/>
366 </conditional> 492 </conditional>
367 <section name="advanced_common"> 493 <section name="advanced_common">
368 <param name="show_log" value="true" /> 494 <param name="show_log" value="true"/>
369 </section> 495 </section>
370 <output name="hidden_output"> 496 <output name="hidden_output">
371 <assert_contents> 497 <assert_contents>
372 <has_text_matching expression="tl.filter_rank_genes_groups"/> 498 <has_text_matching expression="tl.filter_rank_genes_groups"/>
373 <has_text_matching expression="key='rank_genes_groups'"/> 499 <has_text_matching expression="key='rank_genes_groups'"/>
374 <has_text_matching expression="use_raw=False"/> 500 <has_text_matching expression="use_raw=False"/>
375 <has_text_matching expression="log=False"/>
376 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/> 501 <has_text_matching expression="key_added='rank_genes_groups_filtered'"/>
377 <has_text_matching expression="min_in_group_fraction=0.25"/> 502 <has_text_matching expression="min_in_group_fraction=0.25"/>
378 <has_text_matching expression="max_out_group_fraction=0.5"/> 503 <has_text_matching expression="max_out_group_fraction=0.5"/>
379 <has_text_matching expression="min_fold_change=3"/> 504 <has_text_matching expression="min_fold_change=3"/>
380 </assert_contents> 505 </assert_contents>
381 </output> 506 </output>
382 <output name="anndata_out" file="pp.filter_rank_genes_groups.h5ad" ftype="h5ad" compare="sim_size"/> 507 <output name="anndata_out" ftype="h5ad">
383 </test>--> 508 <assert_contents>
384 <test expect_num_outputs="2"> 509 <has_h5_keys keys="uns/rank_genes_groups_filtered"/>
385 <!-- test 5 --> 510 </assert_contents>
511 </output>
512 </test>
513
514 <!-- test 5 -->
515 <test expect_num_outputs="2">
386 <param name="adata" value="blobs.h5ad"/> 516 <param name="adata" value="blobs.h5ad"/>
387 <conditional name="method"> 517 <conditional name="method">
388 <param name="method" value="pp.highly_variable_genes"/> 518 <param name="method" value="pp.highly_variable_genes"/>
389 <conditional name="flavor"> 519 </conditional>
390 <param name="flavor" value="seurat"/> 520 <section name="advanced_common">
391 <param name="min_mean" value="0.0125"/> 521 <param name="show_log" value="true"/>
392 <param name="max_mean" value="3"/>
393 <param name="min_disp" value="0.5"/>
394 </conditional>
395 <param name="n_bins" value="20"/>
396 <param name="subset" value="false"/>
397 </conditional>
398 <section name="advanced_common">
399 <param name="show_log" value="true" />
400 </section> 522 </section>
401 <output name="hidden_output"> 523 <output name="hidden_output">
402 <assert_contents> 524 <assert_contents>
403 <has_text_matching expression="sc.pp.highly_variable_genes"/> 525 <has_text_matching expression="sc.pp.highly_variable_genes"/>
404 <has_text_matching expression="flavor='seurat'"/> 526 <has_text_matching expression="flavor='seurat'"/>
407 <has_text_matching expression="min_disp=0.5"/> 529 <has_text_matching expression="min_disp=0.5"/>
408 <has_text_matching expression="n_bins=20"/> 530 <has_text_matching expression="n_bins=20"/>
409 <has_text_matching expression="subset=False"/> 531 <has_text_matching expression="subset=False"/>
410 </assert_contents> 532 </assert_contents>
411 </output> 533 </output>
412 <output name="anndata_out" file="pp.highly_variable_genes.seurat.blobs.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.2"/> 534 <output name="anndata_out" ftype="h5ad">
413 </test> 535 <assert_contents>
414 <test expect_num_outputs="2"> 536 <has_h5_keys keys="var/highly_variable,var/means,var/dispersions,var/dispersions_norm"/>
415 <!-- test 6 --> 537 <has_h5_keys keys="uns/hvg"/>
416 <param name="adata" value="krumsiek11.h5ad" /> 538 </assert_contents>
539 </output>
540 </test>
541
542 <!-- test 6 -->
543 <test expect_num_outputs="2">
544 <param name="adata" value="krumsiek11.h5ad"/>
417 <conditional name="method"> 545 <conditional name="method">
418 <param name="method" value="pp.highly_variable_genes"/> 546 <param name="method" value="pp.highly_variable_genes"/>
419 <conditional name="flavor"> 547 <conditional name="flavor">
420 <param name="flavor" value="cell_ranger"/> 548 <param name="flavor" value="cell_ranger"/>
421 <param name="n_top_genes" value="2"/> 549 <param name="n_top_genes" value="2"/>
422 </conditional> 550 </conditional>
423 <param name="n_bins" value="20"/>
424 <param name="subset" value="true"/> 551 <param name="subset" value="true"/>
425 </conditional> 552 </conditional>
426 <section name="advanced_common"> 553 <section name="advanced_common">
427 <param name="show_log" value="true" /> 554 <param name="show_log" value="true"/>
428 </section> 555 </section>
429 <output name="hidden_output"> 556 <output name="hidden_output">
430 <assert_contents> 557 <assert_contents>
431 <has_text_matching expression="sc.pp.highly_variable_genes"/> 558 <has_text_matching expression="sc.pp.highly_variable_genes"/>
432 <has_text_matching expression="flavor='cell_ranger'"/> 559 <has_text_matching expression="flavor='cell_ranger'"/>
433 <has_text_matching expression="n_top_genes=2"/> 560 <has_text_matching expression="n_top_genes=2"/>
434 <has_text_matching expression="n_bins=20"/> 561 <has_text_matching expression="n_bins=20"/>
435 <has_text_matching expression="subset=True"/> 562 <has_text_matching expression="subset=True"/>
436 </assert_contents> 563 </assert_contents>
437 </output> 564 </output>
438 <output name="anndata_out" file="pp.highly_variable_genes.krumsiek11-cell_ranger.h5ad" ftype="h5ad" compare="sim_size" delta="100000" delta_frac="0.9"/> 565 <output name="anndata_out" ftype="h5ad">
439 </test> 566 <assert_contents>
440 <test expect_num_outputs="2"> 567 <has_h5_keys keys="var/highly_variable,var/means,var/dispersions,var/dispersions_norm"/>
441 <!-- test 7 --> 568 <has_h5_keys keys="uns/hvg"/>
442 <param name="adata" value="krumsiek11.h5ad" /> 569 </assert_contents>
570 </output>
571 </test>
572
573 <!-- test 7 -->
574 <test expect_num_outputs="2">
575 <param name="adata" value="krumsiek11.h5ad"/>
443 <conditional name="method"> 576 <conditional name="method">
444 <param name="method" value="pp.subsample"/> 577 <param name="method" value="pp.subsample"/>
445 <conditional name="type"> 578 <conditional name="type">
446 <param name="type" value="fraction" /> 579 <param name="type" value="fraction"/>
447 <param name="fraction" value="0.5"/> 580 <param name="fraction" value="0.5"/>
448 </conditional> 581 </conditional>
449 <param name="random_state" value="0"/> 582 </conditional>
450 </conditional> 583 <section name="advanced_common">
451 <section name="advanced_common"> 584 <param name="show_log" value="true"/>
452 <param name="show_log" value="true" /> 585 </section>
453 </section> 586 <assert_stdout>
587 <has_text_matching expression="320 × 11"/>
588 </assert_stdout>
454 <output name="hidden_output"> 589 <output name="hidden_output">
455 <assert_contents> 590 <assert_contents>
456 <has_text_matching expression="sc.pp.subsample"/> 591 <has_text_matching expression="sc.pp.subsample"/>
457 <has_text_matching expression="fraction=0.5"/> 592 <has_text_matching expression="fraction=0.5"/>
458 <has_text_matching expression="random_state=0"/> 593 <has_text_matching expression="random_state=0"/>
459 </assert_contents> 594 </assert_contents>
460 </output> 595 </output>
461 <output name="anndata_out" file="pp.subsample.krumsiek11_fraction.h5ad" ftype="h5ad" compare="sim_size"/> 596 <output name="anndata_out" ftype="h5ad">
462 </test> 597 <assert_contents>
463 <test expect_num_outputs="2"> 598 <has_h5_keys keys="obs/cell_type"/>
464 <!-- test 8 --> 599 </assert_contents>
465 <param name="adata" value="krumsiek11.h5ad" /> 600 </output>
601 </test>
602
603 <!-- test 8 -->
604 <test expect_num_outputs="2">
605 <param name="adata" value="krumsiek11.h5ad"/>
466 <conditional name="method"> 606 <conditional name="method">
467 <param name="method" value="pp.subsample"/> 607 <param name="method" value="pp.subsample"/>
468 <conditional name="type"> 608 <conditional name="type">
469 <param name="type" value="n_obs" /> 609 <param name="type" value="n_obs"/>
470 <param name="n_obs" value="10"/> 610 <param name="n_obs" value="10"/>
471 </conditional> 611 </conditional>
472 <param name="random_state" value="0"/> 612 </conditional>
473 </conditional> 613 <section name="advanced_common">
474 <section name="advanced_common"> 614 <param name="show_log" value="true"/>
475 <param name="show_log" value="true" /> 615 </section>
476 </section> 616 <assert_stdout>
617 <has_text_matching expression="10 × 11"/>
618 </assert_stdout>
477 <output name="hidden_output"> 619 <output name="hidden_output">
478 <assert_contents> 620 <assert_contents>
479 <has_text_matching expression="sc.pp.subsample"/> 621 <has_text_matching expression="sc.pp.subsample"/>
480 <has_text_matching expression="n_obs=10"/> 622 <has_text_matching expression="n_obs=10"/>
481 <has_text_matching expression="random_state=0"/> 623 <has_text_matching expression="random_state=0"/>
482 </assert_contents> 624 </assert_contents>
483 </output> 625 </output>
484 <output name="anndata_out" file="pp.subsample.krumsiek11_n_obs.h5ad" ftype="h5ad" compare="sim_size"/> 626 <output name="anndata_out" ftype="h5ad">
485 </test> 627 <assert_contents>
486 <test expect_num_outputs="2"> 628 <has_h5_keys keys="obs/cell_type"/>
487 <!-- test 9 --> 629 </assert_contents>
488 <param name="adata" value="random-randint.h5ad" /> 630 </output>
631 </test>
632
633 <!-- test 9 -->
634 <test expect_num_outputs="2">
635 <param name="adata" value="random-randint.h5ad"/>
489 <conditional name="method"> 636 <conditional name="method">
490 <param name="method" value="pp.downsample_counts"/> 637 <param name="method" value="pp.downsample_counts"/>
491 <param name="total_counts" value="20000"/> 638 <param name="total_counts" value="20000"/>
492 <param name="random_state" value="0"/> 639 </conditional>
493 <param name="replace" value="false"/> 640 <section name="advanced_common">
494 </conditional> 641 <param name="show_log" value="true"/>
495 <section name="advanced_common">
496 <param name="show_log" value="true" />
497 </section> 642 </section>
498 <output name="hidden_output"> 643 <output name="hidden_output">
499 <assert_contents> 644 <assert_contents>
500 <has_text_matching expression="sc.pp.downsample_counts"/> 645 <has_text_matching expression="sc.pp.downsample_counts"/>
501 <has_text_matching expression="total_counts=20000"/> 646 <has_text_matching expression="total_counts=20000"/>
502 <has_text_matching expression="random_state=0"/> 647 <has_text_matching expression="random_state=0"/>
503 <has_text_matching expression="replace=False"/> 648 <has_text_matching expression="replace=False"/>
504 </assert_contents> 649 <has_text_matching expression="Sum of total counts before: 49983776.0"/>
505 </output> 650 <has_text_matching expression="Sum of total counts after: 20000"/>
506 <output name="anndata_out" file="pp.downsample_counts.random-randint.h5ad" ftype="h5ad" compare="sim_size" delta="10000000" delta_frac="0.5"/> 651 </assert_contents>
507 </test> 652 </output>
508 <test expect_num_outputs="3"> 653 <output name="anndata_out" ftype="h5ad">
509 <!-- test 10 --> 654 <assert_contents>
510 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad" /> 655 <has_h5_keys keys="var/index"/>
656 </assert_contents>
657 </output>
658 </test>
659
660 <!-- test 10 -->
661 <test expect_num_outputs="2">
662 <param name="adata" value="random-randint.h5ad"/>
663 <conditional name="method">
664 <param name="method" value="pp.downsample_counts"/>
665 <param name="counts_per_cell" value="20000"/>
666 </conditional>
667 <section name="advanced_common">
668 <param name="show_log" value="true"/>
669 </section>
670 <output name="hidden_output">
671 <assert_contents>
672 <has_text_matching expression="sc.pp.downsample_counts"/>
673 <has_text_matching expression="counts_per_cell=20000"/>
674 <has_text_matching expression="random_state=0"/>
675 <has_text_matching expression="replace=False"/>
676 <has_text_matching expression="Sum of counts for the first cell before: 489934.0"/>
677 <has_text_matching expression="Sum of counts for the last cell before: 503669.0"/>
678 <has_text_matching expression="Sum of counts for the first cell after: 20000.0"/>
679 <has_text_matching expression="Sum of counts for the last cell after: 20000.0"/>
680 </assert_contents>
681 </output>
682 <output name="anndata_out" ftype="h5ad">
683 <assert_contents>
684 <has_h5_keys keys="var/index"/>
685 </assert_contents>
686 </output>
687 </test>
688
689 <!-- test 10 -->
690 <test expect_num_outputs="2">
691 <param name="adata" value="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1.h5ad"/>
511 <conditional name="method"> 692 <conditional name="method">
512 <param name="method" value="filter_marker"/> 693 <param name="method" value="filter_marker"/>
513 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/> 694 <param name="markerfile" value="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_1.tsv"/>
514 <param name="thresh_mean" value="1.0"/>
515 <param name="thresh_frac" value="0.2"/> 695 <param name="thresh_frac" value="0.2"/>
516 <param name="layer_selection" value="True"/> 696 <conditional name="layer_selection">
697 <param name="use_raw" value="True"/>
698 </conditional>
517 <param name="groupby" value="bulk_labels"/> 699 <param name="groupby" value="bulk_labels"/>
518 </conditional> 700 </conditional>
519 <section name="advanced_common"> 701 <section name="advanced_common">
520 <param name="show_log" value="true" /> 702 <param name="show_log" value="true"/>
521 </section> 703 </section>
522 <output name="hidden_output"> 704 <output name="hidden_output">
523 <assert_contents> 705 <assert_contents>
524 <has_text_matching expression="adata, key, x, 1.0, 0.2, 'bulk_labels'"/> 706 <has_text_matching expression="adata, key, x, 1.0, 0.2, 'bulk_labels'"/>
525 </assert_contents> 707 </assert_contents>
526 </output> 708 </output>
527 <output name="anndata_out" file="cosg.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_1_out.h5ad" ftype="h5ad"> 709 <output name="marker_out" ftype="tabular">
528 <assert_contents> 710 <assert_contents>
529 <has_h5_keys keys="obs, var, uns" /> 711 <has_text text="CD14+ Monocyte"/>
530 </assert_contents> 712 <has_text text="C9orf142"/>
531 </output> 713 <has_text text="EGR1"/>
532 <output name="marker_out" file="tl.rank_genes_groups.newton-cg.pbmc68k_highly_reduced_marker_filtered_1.tsv" ftype="tabular" compare="sim_size"/> 714 <has_text text="GZMB"/>
715 </assert_contents>
716 </output>
717 </test>
718
719 <!-- test 11 -->
720 <test expect_num_outputs="2">
721 <param name="adata" value="krumsiek11.h5ad"/>
722 <conditional name="method">
723 <param name="method" value="pp.scrublet"/>
724 <param name="n_prin_comps" value="5"/>
725 </conditional>
726 <section name="advanced_common">
727 <param name="show_log" value="true"/>
728 </section>
729 <output name="hidden_output">
730 <assert_contents>
731 <has_text_matching expression="sc.pp.scrublet"/>
732 <has_text_matching expression="sim_doublet_ratio=2.0"/>
733 <has_text_matching expression="expected_doublet_rate=0.05"/>
734 <has_text_matching expression="n_prin_comps=5"/>
735 </assert_contents>
736 </output>
737 <output name="anndata_out" ftype="h5ad">
738 <assert_contents>
739 <has_h5_keys keys="obs/doublet_score,obs/predicted_doublet"/>
740 <has_h5_keys keys="uns/scrublet"/>
741 </assert_contents>
742 </output>
533 </test> 743 </test>
534 </tests> 744 </tests>
535 <help><![CDATA[ 745 <help><![CDATA[
536 746
537 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`) 747 Filter cells outliers based on counts and numbers of genes expressed (`pp.filter_cells`)
587 ========================================== 797 ==========================================
588 798
589 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This 799 Downsample counts so that each cell has no more than `target_counts`. Cells with fewer counts than `target_counts` are unaffected by this. This
590 has been implemented by M. D. Luecken. 800 has been implemented by M. D. Luecken.
591 801
802 More details on the `scanpy documentation
803 <https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.downsample_counts.html>`__
592 804
593 Filter marker genes (`filter_marker`) 805 Filter marker genes (`filter_marker`)
594 ====================================================================== 806 =====================================
595 807
596 This option is specific for celltype marker gene detection. You can generate a celltype marker gene file (tsv) with **COSG** provided at Galaxy. 808 This option is specific for celltype marker gene detection. You can generate a celltype marker gene file (tsv) with **COSG** provided at Galaxy.
597 809
598 The marker gene file should have as rows celltypes and columns as marker genes. Each celltype can have varying number of marker genes. 810 The marker gene file should have as rows celltypes and columns as marker genes. Each celltype can have varying number of marker genes.
599 811
600 A marker gene is returned (retained in the list) if the mean expression of the marker gene is bigger than the threshold of mean expression (thresh_mean) and if the fraction of cells with the marker gene expression is equal or higher than the cell fraction threshold (thresh_frac). 812 A marker gene is returned (retained in the list) if the mean expression of the marker gene is bigger than the threshold of mean expression (thresh_mean) and if the fraction of cells with the marker gene expression is equal or higher than the cell fraction threshold (thresh_frac).
601 813
602 More details on the `scanpy documentation 814 More details on the `scanpy documentation
603 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__ 815 <https://scanpy.readthedocs.io/en/stable/api/scanpy.pp.downsample_counts.html>`__
604 816
817
818 Predict cell doublets using a nearest-neighbor classifier of observed transcriptomes and simulated doublets. (`pp.scrublet`)
819 ============================================================================================================================
820
821 Works best if the input is a raw (unnormalized) counts matrix from a single sample or a collection of similar samples from the same experiment. This function is a wrapper around functions that pre-process using Scanpy and directly call functions of Scrublet().
822
823 More details on the `scanpy documentation
824 <https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.scrublet.html>`__
605 825
606 ]]></help> 826 ]]></help>
607 <expand macro="citations"/> 827 <expand macro="citations"/>
608 </tool> 828 </tool>