# HG changeset patch # User iuc # Date 1766769411 0 # Node ID 35d32c807c234fe464ce26915c1c39ff16cfadad # Parent ced734560c9d788edf7b4cd921195f701e8efc32 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_datasets commit 5a65a62588a36d757f96681bf72f537c12c91beb diff -r ced734560c9d -r 35d32c807c23 datasets_gene.xml --- a/datasets_gene.xml Mon Mar 17 11:05:34 2025 +0000 +++ b/datasets_gene.xml Fri Dec 26 17:16:51 2025 +0000 @@ -4,7 +4,7 @@ macros.xml - + - - + + @@ -109,8 +109,8 @@ - - + + @@ -133,7 +133,7 @@ - + @@ -209,7 +209,7 @@ file_choices['kingdom_cond']['include'] and "cds" in file_choices['kingdom_cond']['include'] - file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include'] + file_choices['kingdom_cond']['include'] and "3p-utr" in file_choices['kingdom_cond']['include'] file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include'] @@ -235,12 +235,12 @@ - + - + @@ -263,12 +263,12 @@ - + - + @@ -284,6 +284,7 @@
+
@@ -297,17 +298,17 @@ - + - + - + @@ -315,11 +316,6 @@ -
- - - -
@@ -361,17 +357,17 @@ - + - +
- + @@ -380,11 +376,6 @@ -
- - - -
@@ -395,7 +386,7 @@
- + @@ -403,11 +394,6 @@ -
- - - -
@@ -417,7 +403,7 @@
- + @@ -426,11 +412,6 @@ -
- - - -
@@ -439,7 +420,6 @@
- @@ -466,24 +446,23 @@ - + - + - + - - +
- - - +**Outputs (Eukaryote)** + +- **Gene Data Report**: Tabular metadata (ID, symbol, description, coordinates) +- **Gene Product Report**: Detailed transcript/protein information +- **Sequences**: Gene, RNA, protein, CDS, 5'/3' UTR FASTA files + +**Outputs (Prokaryote)** + +Prokaryotic genes (WP\_ accessions) use a different report format with: +accession, description, EC number, gene symbol, protein info. + +**Examples** + +Download human BRCA1:: + + Query by: Gene ID + Gene ID: 672 + +Download TP53 orthologs in rodents:: + + Query by: Symbol + Symbol: tp53 + Ortholog: rodentia + + +.. _datasets: https://www.ncbi.nlm.nih.gov/datasets/ +]]> diff -r ced734560c9d -r 35d32c807c23 datasets_genome.xml --- a/datasets_genome.xml Mon Mar 17 11:05:34 2025 +0000 +++ b/datasets_genome.xml Fri Dec 26 17:16:51 2025 +0000 @@ -4,9 +4,14 @@ macros.xml - + - + + + + + - @@ -137,35 +141,35 @@ - + file_choices['include'] and "seq-report" in file_choices['include'] - + file_choices['include'] and "genome" in file_choices['include'] - + file_choices['include'] and "rna" in file_choices['include'] - + file_choices['include'] and "protein" in file_choices['include'] - + file_choices['include'] and "cds" in file_choices['include'] - + file_choices['include'] and "gff3" in file_choices['include'] - + file_choices['include'] and "gtf" in file_choices['include'] - + file_choices['include'] and "gbff" in file_choices['include'] @@ -175,8 +179,10 @@ - - +
+ + +
@@ -184,15 +190,15 @@
- - + + - + - + @@ -212,28 +218,25 @@ - + - - - +
+ +
- - - - - - - - - - - - - + + + + + + + + + + @@ -365,8 +373,10 @@ - - +
+ + +
@@ -395,19 +405,19 @@ - + - + - + @@ -437,7 +447,7 @@ - + @@ -463,44 +473,90 @@ - - + +
- + - - + + + + + + + + + + +
+ + + +
+
+ +
+ + +
- - - +**Note**: The "Reference only" filter returns only RefSeq (GCF\_) assemblies. +If a taxon has only GenBank (GCA\_) assemblies, this filter will return no results +with a misleading error message. It is a NCBI datasets bug (not a Galaxy bug). + +**Outputs** + +- **Data Report**: Tabular metadata for matching assemblies +- **Genome FASTA**: Genomic sequences (nested collection by accession) +- **Annotation files**: GFF3, GTF, GenBank flat files +- **Protein/RNA/CDS**: Amino acid and nucleotide sequences +- **Sequence Report**: Per-sequence metadata (chromosome, length, etc.) + +.. _datasets: https://www.ncbi.nlm.nih.gov/datasets/ + +]]> diff -r ced734560c9d -r 35d32c807c23 macros.xml --- a/macros.xml Mon Mar 17 11:05:34 2025 +0000 +++ b/macros.xml Fri Dec 26 17:16:51 2025 +0000 @@ -1,5 +1,5 @@ - 17.1.0 + 18.13.0 0 23.0 MIT @@ -11,8 +11,9 @@ ncbi-datasets-cli - ca-certificates - unzip + ca-certificates + + @@ -94,7 +95,7 @@ - + @@ -104,7 +105,7 @@ - + @@ -401,11 +402,8 @@ - @misc{NCBI, - author = "{NCBI}", - title = "NCBI Datasets", - year = "2022", - url = "https://github.com/ncbi/datasets"} + + 10.1038/s41597-024-03571-y diff -r ced734560c9d -r 35d32c807c23 test-data/GCF_000007445.1.genomic.gtf --- a/test-data/GCF_000007445.1.genomic.gtf Mon Mar 17 11:05:34 2025 +0000 +++ b/test-data/GCF_000007445.1.genomic.gtf Fri Dec 26 17:16:51 2025 +0000 @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_004431.1 RefSeq gene 190 255 . + . gene_id "C_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "C_RS00005"; old_locus_tag "c5491"; -NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_004431.1 Protein Homology CDS 190 252 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology start_codon 190 192 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_004431.1 Protein Homology stop_codon 253 255 . + 0 gene_id "C_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "C_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_004431.1 RefSeq gene 453 911 . + . gene_id "C_RS00010"; transcript_id ""; gbkey "Gene"; gene "tnpA"; gene_biotype "protein_coding"; locus_tag "C_RS00010"; old_locus_tag "c0002"; diff -r ced734560c9d -r 35d32c807c23 test-data/genome.2.GCF_000013305.1.genomic.gtf --- a/test-data/genome.2.GCF_000013305.1.genomic.gtf Mon Mar 17 11:05:34 2025 +0000 +++ b/test-data/genome.2.GCF_000013305.1.genomic.gtf Fri Dec 26 17:16:51 2025 +0000 @@ -1,6 +1,5 @@ -#!annotation-source NCBI RefSeq NC_008253.1 RefSeq gene 190 255 . + . gene_id "ECP_RS00005"; transcript_id ""; gbkey "Gene"; gene "thrL"; gene_biotype "protein_coding"; locus_tag "ECP_RS00005"; old_locus_tag "ECP_0001"; -NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; -NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; +NC_008253.1 Protein Homology CDS 190 252 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology start_codon 190 192 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; +NC_008253.1 Protein Homology stop_codon 253 255 . + 0 gene_id "ECP_RS00005"; transcript_id "unassigned_transcript_1"; db_xref "GenBank:WP_001386572.1"; gbkey "CDS"; gene "thrL"; inference "COORDINATES: similar to AA sequence:RefSeq:NP_414542.1"; locus_tag "ECP_RS00005"; product "thr operon leader peptide"; protein_id "WP_001386572.1"; transl_table "11"; exon_number "1"; NC_008253.1 RefSeq gene 336 2798 . + . gene_id "ECP_RS00010"; transcript_id ""; gbkey "Gene"; gene "thrA"; gene_biotype "protein_coding"; locus_tag "ECP_RS00010"; old_locus_tag "ECP_0002";