Mercurial > repos > iuc > ncbi_datasets
diff datasets_genome.xml @ 16:9a10a6449901 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit 1c7abf3293422fb432b5acd2ef178e7536d65f0b
| author | iuc |
|---|---|
| date | Fri, 21 Feb 2025 18:43:58 +0000 |
| parents | dfad868c911b |
| children | 35d32c807c23 |
line wrap: on
line diff
--- a/datasets_genome.xml Thu Sep 21 23:02:45 2023 +0000 +++ b/datasets_genome.xml Fri Feb 21 18:43:58 2025 +0000 @@ -33,6 +33,10 @@ --chromosomes '$filters.chromosomes' #end if $filters.exclude_atypical +#if $filters.mag: + --mag '$filters.mag' +#end if + @INCLUDE@ @RELEASED_BEFORE@ @RELEASED_AFTER@ @@ -51,7 +55,7 @@ ## unzip and rehydrate if any data is to be downloaded (include is not None) #if $file_choices.include ## unzip - && 7z x -y ncbi_dataset.zip > 7z.log + && unzip ncbi_dataset.zip ## rehydrate && datasets rehydrate @@ -106,6 +110,10 @@ <expand macro="assembly_source"/> <expand macro="chromosomes"/> <param argument="--exclude-atypical" type="boolean" truevalue="--exclude-atypical" falsevalue="" label="Exclude atypical assemblies"/> + <param argument="--mag" type="select" multiple="false" optional="true" label="Filter metagenome assembled genomes (MAGs)"> + <option value="only" selected="false">Limit to MAGs</option> + <option value="exclude" selected="false">Exclude MAGs</option> + </param> <expand macro="released_options"/> <expand macro="released_options" before_or_after="after"/> @@ -254,7 +262,7 @@ <param name="decompress" value="true"/> </section> <assert_stderr> - <has_text text="No assemblies found that match selection"/> + <has_text text="no genome assemblies were found"/> </assert_stderr> <!-- In the current state of the NCBI tool/DB, no output to check. But the returned results seem to change from time to time and it might @@ -297,14 +305,14 @@ <assert_contents> <has_text text="GCF_000007445.1"/> <has_n_lines n="2"/> - <has_n_columns n="14"/> + <has_n_columns n="15"/> </assert_contents> </element> <element name="GCF_000013305.1"> <assert_contents> <has_text text="GCF_000013305.1"/> <has_n_lines n="2"/> - <has_n_columns n="14"/> + <has_n_columns n="15"/> </assert_contents> </element> </output_collection> @@ -332,8 +340,8 @@ </section> <output name="genome_data_report"> <assert_contents> - <has_text text="GCF_000013305.1"/> - <has_text text="GCF_000007445.1"/> + <has_text text="GCF_000013305.1"/> + <has_text text="GCF_000007445.1"/> <has_n_lines n="3"/> <has_n_columns n="4"/> </assert_contents> @@ -349,7 +357,7 @@ </test> <!-- should not fail https://github.com/ncbi/datasets/issues/194 --> - <test expect_num_outputs="2"> <!-- expect_failure="true"> --> + <test expect_num_outputs="2"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> @@ -362,9 +370,17 @@ <section name="file_choices"> <param name="include" value="seq-report"/> </section> - <!-- - <output_collection name="sequence_report" type="list" count="4" > - --> + <output name="genome_data_report"> + <!-- assert that we get at least the 16 versions available at the time of writing this test --> + <assert_contents> + <has_text text="GCF_000001405" min="16"/> + <has_n_lines min="16"/> + <has_n_columns n="4"/> + </assert_contents> + </output> + <!--not testing the collection output. the count will change over time + and this can't be tested for at the moment + <output_collection name="sequence_report" type="list" count="16"/> --> </test> <test expect_num_outputs="5"> <conditional name="query|subcommand"> @@ -413,7 +429,7 @@ <element name="GCF_000146045.2"> <element name="GCF_000146045.2_R64" ftype="fasta.gz"> <assert_contents> - <has_size value="3843460"/> + <has_size value="3843460" delta="2000"/> </assert_contents> </element> </element> @@ -421,14 +437,14 @@ <output_collection name="protein_fasta" type="list" count="1"> <element name="GCF_000146045.2" ftype="fasta.gz"> <assert_contents> - <has_size value="1845038"/> + <has_size value="1845038" delta="2000"/> </assert_contents> </element> </output_collection> <output_collection name="rna_fasta" type="list" count="1"> <element name="GCF_000146045.2" ftype="fasta.gz"> <assert_contents> - <has_size value="2784899"/> + <has_size min="2700000" max="2800000"/> </assert_contents> </element> </output_collection> @@ -447,14 +463,13 @@ </section> <output_collection name="sequence_report" type="list" count="2"/> <output_collection name="genome_fasta" type="list:list" count="2"> - <expand macro="genome_fasta_assert" el1="GCF_000002945.1" el2="GCF_000002945.1_ASM294v2" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="4"/> - <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc] (mitochondrion|chromosome .*), complete (sequence|genome)" expression_n="17"/> + <expand macro="genome_fasta_assert" el1="GCF_000002945.2" el2="GCF_000002945.2_ASM294v3" expression=">NC_[0-9]+\.[0-9]+ Schizosaccharomyces pombe.*" expression_n="4"/> + <expand macro="genome_fasta_assert" el1="GCF_000146045.2" el2="GCF_000146045.2_R64" expression=">NC_[0-9]+\.[0-9]+ Saccharomyces cerevisiae S288[Cc].*" expression_n="17"/> </output_collection> </test> - <!-- tax_exact_match seems not able to filter out strains - https://github.com/ncbi/datasets/issues/187 - hence we set expect_test_failure="true"--> - <test expect_num_outputs="1" expect_test_failure="true"> + <!-- tax_exact_match should filter out strains + https://github.com/ncbi/datasets/issues/187 --> + <test expect_num_outputs="1"> <conditional name="query|subcommand"> <param name="download_by" value="taxon"/> <param name="taxon_positional" value="4932"/>
