# HG changeset patch
# User iuc
# Date 1766769411 0
# Node ID 35d32c807c234fe464ce26915c1c39ff16cfadad
# Parent ced734560c9d788edf7b4cd921195f701e8efc32
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_datasets commit 5a65a62588a36d757f96681bf72f537c12c91beb
diff -r ced734560c9d -r 35d32c807c23 datasets_gene.xml
--- a/datasets_gene.xml Mon Mar 17 11:05:34 2025 +0000
+++ b/datasets_gene.xml Fri Dec 26 17:16:51 2025 +0000
@@ -4,7 +4,7 @@
macros.xml
-
+
-
-
+
+
@@ -109,8 +109,8 @@
-
-
+
+
@@ -133,7 +133,7 @@
-
+
@@ -209,7 +209,7 @@
file_choices['kingdom_cond']['include'] and "cds" in file_choices['kingdom_cond']['include']
- file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']
+ file_choices['kingdom_cond']['include'] and "3p-utr" in file_choices['kingdom_cond']['include']
file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']
@@ -235,12 +235,12 @@
@@ -263,12 +263,12 @@
@@ -284,6 +284,7 @@
@@ -297,17 +298,17 @@
-
+
@@ -315,11 +316,6 @@
-
-
+
@@ -380,11 +376,6 @@
-
-
+
@@ -403,11 +394,6 @@
-
-
+
@@ -426,11 +412,6 @@
-
-
@@ -466,24 +446,23 @@
-
-
+
-
-
-
+**Outputs (Eukaryote)**
+
+- **Gene Data Report**: Tabular metadata (ID, symbol, description, coordinates)
+- **Gene Product Report**: Detailed transcript/protein information
+- **Sequences**: Gene, RNA, protein, CDS, 5'/3' UTR FASTA files
+
+**Outputs (Prokaryote)**
+
+Prokaryotic genes (WP\_ accessions) use a different report format with:
+accession, description, EC number, gene symbol, protein info.
+
+**Examples**
+
+Download human BRCA1::
+
+ Query by: Gene ID
+ Gene ID: 672
+
+Download TP53 orthologs in rodents::
+
+ Query by: Symbol
+ Symbol: tp53
+ Ortholog: rodentia
+
+
+.. _datasets: https://www.ncbi.nlm.nih.gov/datasets/
+]]>
diff -r ced734560c9d -r 35d32c807c23 datasets_genome.xml
--- a/datasets_genome.xml Mon Mar 17 11:05:34 2025 +0000
+++ b/datasets_genome.xml Fri Dec 26 17:16:51 2025 +0000
@@ -4,9 +4,14 @@
macros.xml
-
+
-
+
+
+
+
+
-
@@ -137,35 +141,35 @@
-
+
file_choices['include'] and "seq-report" in file_choices['include']
-
+
file_choices['include'] and "genome" in file_choices['include']
-
+
file_choices['include'] and "rna" in file_choices['include']
-
+
file_choices['include'] and "protein" in file_choices['include']
-
+
file_choices['include'] and "cds" in file_choices['include']
-
+
file_choices['include'] and "gff3" in file_choices['include']
-
+
file_choices['include'] and "gtf" in file_choices['include']
-
+
file_choices['include'] and "gbff" in file_choices['include']
@@ -175,8 +179,10 @@
-
-
+
-
+
-
+
@@ -212,28 +218,25 @@
-
+
-
-
-
+
-
-
-
-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
@@ -365,8 +373,10 @@
-
-
+
@@ -395,19 +405,19 @@
-
+
-
+
-
+
@@ -437,7 +447,7 @@
-
+
@@ -463,44 +473,90 @@
-
-
+
+
-
+
-
+
+
+
+
+
+
+
+
+
+
-
-
-
+**Note**: The "Reference only" filter returns only RefSeq (GCF\_) assemblies.
+If a taxon has only GenBank (GCA\_) assemblies, this filter will return no results
+with a misleading error message. It is a NCBI datasets bug (not a Galaxy bug).
+
+**Outputs**
+
+- **Data Report**: Tabular metadata for matching assemblies
+- **Genome FASTA**: Genomic sequences (nested collection by accession)
+- **Annotation files**: GFF3, GTF, GenBank flat files
+- **Protein/RNA/CDS**: Amino acid and nucleotide sequences
+- **Sequence Report**: Per-sequence metadata (chromosome, length, etc.)
+
+.. _datasets: https://www.ncbi.nlm.nih.gov/datasets/
+
+]]>
diff -r ced734560c9d -r 35d32c807c23 macros.xml
--- a/macros.xml Mon Mar 17 11:05:34 2025 +0000
+++ b/macros.xml Fri Dec 26 17:16:51 2025 +0000
@@ -1,5 +1,5 @@
- 17.1.0
+ 18.13.0
0
23.0
MIT
@@ -11,8 +11,9 @@
ncbi-datasets-cli
- ca-certificates
- unzip
+ ca-certificates
+
+
@@ -94,7 +95,7 @@
-
+
@@ -104,7 +105,7 @@
-
+
@@ -401,11 +402,8 @@
- @misc{NCBI,
- author = "{NCBI}",
- title = "NCBI Datasets",
- year = "2022",
- url = "https://github.com/ncbi/datasets"}
+
+ 10.1038/s41597-024-03571-y
diff -r ced734560c9d -r 35d32c807c23 test-data/GCF_000007445.1.genomic.gtf
--- a/test-data/GCF_000007445.1.genomic.gtf Mon Mar 17 11:05:34 2025 +0000
+++ b/test-data/GCF_000007445.1.genomic.gtf Fri Dec 26 17:16:51 2025 +0000
@@ -1,6 +1,5 @@
-#!annotation-source NCBI RefSeq
NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491";
-NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
+NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002";
diff -r ced734560c9d -r 35d32c807c23 test-data/genome.2.GCF_000013305.1.genomic.gtf
--- a/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Mar 17 11:05:34 2025 +0000
+++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Fri Dec 26 17:16:51 2025 +0000
@@ -1,6 +1,5 @@
-#!annotation-source NCBI RefSeq
NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001";
-NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
-NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11";
+NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
+NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1";
NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002";