# HG changeset patch
# User estrain
# Date 1658354090 0
# Node ID 1af0cdf9b1fa24b78f7bc29922d58f6ac8b4d746
# Parent ceda2bd3e41d1d9b26523c65feaa7b845236be99
Uploaded
diff -r ceda2bd3e41d -r 1af0cdf9b1fa data_manager_fastani/data_manager/data_manager_fastani.py
--- a/data_manager_fastani/data_manager/data_manager_fastani.py Mon May 30 00:56:12 2022 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.py Wed Jul 20 21:54:50 2022 +0000
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#/usr/bin/env python
# Errol Strain, estrain@gmail.com
# Database downloads for FastANI
@@ -9,6 +9,7 @@
import re
import argparse
import requests
+import csv
def download_D1(output_directory):
@@ -37,20 +38,24 @@
baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"
- urldict={'Aeromonas_caviae_strain_WP8_S18_ESBL_04':'Aeromonas_caviae/representative/GCF_014169735.1_ASM1416973v1/GCF_014169735.1_ASM1416973v1_genomic.fna.gz',
- 'Aeromonas_veronii_strain_FDAARGOS_632':'Aeromonas_veronii/representative/GCF_008693705.1_ASM869370v1/GCF_008693705.1_ASM869370v1_genomic.fna.gz',
- 'Aeromonas_sobria_strain_CECT_4245':'Aeromonas_sobria/representative/GCF_000820145.1_PRJEB7040/GCF_000820145.1_PRJEB7040_genomic.fna.gz',
- 'Edwardsiella_piscicida_strain_18EpOKYJ':'Edwardsiella_piscicida/representative/GCF_021733145.1_ASM2173314v1/GCF_021733145.1_ASM2173314v1_genomic.fna.gz',
- 'Vibrio_alginolyticus_FDAARGOS_97':'Vibrio_alginolyticus/representative/GCF_001471275.2_ASM147127v2/GCF_001471275.2_ASM147127v2_cds_from_genomic.fna.gz',
- 'Vibrio_harveyi_ATCC_33843':'Vibrio_harveyi/representative/GCF_000770115.1_ASM77011v2/GCF_000770115.1_ASM77011v2_genomic.fna.gz',
- 'Vibrio_rotiferianus_strain_B64D1':'Vibrio_rotiferianus/representative/GCF_002214395.1_ASM221439v1/GCF_002214395.1_ASM221439v1_genomic.fna.gz',
- 'Staphylococcus_warneri_strain_22_1':'Staphylococcus_warneri/representative/GCF_003571725.1_ASM357172v1/GCF_003571725.1_ASM357172v1_genomic.fna.gz'}
-
+def download_VL(output_directory,accfile):
+
+ cwd = os.getcwd()
+ os.chdir(output_directory)
+
+ baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"
+
+ urldict = {}
+ with open(accfile,mode='r') as inp:
+ reader = csv.reader(inp)
+ urldict = {rows[0]:rows[1] for rows in reader}
+
#FastANI uses filenames in output. Creating user friendly names
- #for fish pathogens
- for key in urldict:
- url=baseurl+urldict[key]
- filename = key + '.fna.gz'
+ #for fish pathogens
+ for key in urldict:
+ url=baseurl+key+"/representative/"+urldict[key]+"/"+urldict[key]+"_genomic.fna.gz"
+ filename = key + '.fna.gz'
+ print(url+"\n")
with open(filename, "wb") as f:
r = requests.get(url)
f.write(r.content)
@@ -84,6 +89,7 @@
parser = argparse.ArgumentParser(description='Download FastANI Databases')
parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
parser.add_argument('--name', type=str, required=True, nargs=1, help='Unique Database Folder Name')
+ parser.add_argument('--acc', type=str, required=True, nargs=1, help='CSV Accession file with NCBI ftp folder IDs, see tool-data')
parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
args = parser.parse_args()
@@ -101,7 +107,7 @@
version="FastANI D1"
elif(args.type[0]=="VL"):
download_D1(output_directory)
- download_VL(output_directory)
+ download_VL(output_directory,args.acc[0])
version="FastANI D1 + VetLIRN"
print_json(version,output_directory,args.name[0],args.out[0])
diff -r ceda2bd3e41d -r 1af0cdf9b1fa data_manager_fastani/data_manager/data_manager_fastani.xml
--- a/data_manager_fastani/data_manager/data_manager_fastani.xml Mon May 30 00:56:12 2022 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.xml Wed Jul 20 21:54:50 2022 +0000
@@ -2,7 +2,7 @@
diff -r ceda2bd3e41d -r 1af0cdf9b1fa data_manager_fastani/tool-data/accessions.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool-data/accessions.csv Wed Jul 20 21:54:50 2022 +0000
@@ -0,0 +1,36 @@
+Aeromonas_allosaccharophila,GCF_016026615.1_ASM1602661v1
+Aeromonas_aquatica,GCF_000764655.1_ASM76465v1
+Aeromonas_australiensis,GCF_000819725.1_PRJEB7021
+Aeromonas_bestiarum,GCF_002906925.1_ASM290692v1
+Aeromonas_bivalvium,GCF_003265465.1_ASM326546v1
+Aeromonas_cavernicola,GCF_002795305.1_ASM279530v1
+Aeromonas_caviae,GCF_014169735.1_ASM1416973v1
+Aeromonas_dhakensis,GCF_020405345.1_ASM2040534v1
+Aeromonas_diversa,GCF_000819805.1_PRJEB7026
+Aeromonas_encheleia,GCF_900637545.1_51438_G01
+Aeromonas_enteropelogenes,GCF_020341435.1_ASM2034143v1
+Aeromonas_eucrenophila,GCF_000819865.1_PRJEB7029
+Aeromonas_finlandensis,GCF_000764645.1_ASM76464v1
+Aeromonas_fluvialis,GCF_000819885.1_PRJEB7030
+Aeromonas_hydrophila,GCF_017310215.1_ASM1731021v1
+Aeromonas_jandaei,GCF_016127195.1_ASM1612719v1
+Aeromonas_lacus,GCF_000764665.1_ASM76466v1
+Aeromonas_lusitana,GCF_002812985.1_MDC2473
+Aeromonas_media,GCF_020423125.1_ASM2042312v1
+Aeromonas_molluscorum,GCF_000388115.1_Amol1.0
+Aeromonas_piscicola,GCF_000820005.1_PRJEB7033
+Aeromonas_popoffii,GCF_000820025.1_PRJEB7034
+Aeromonas_rivipollensis,GCF_010974825.1_ASM1097482v1
+Aeromonas_rivuli,GCF_020149575.1_ASM2014957v1
+Aeromonas_sanarellii,GCF_000820085.1_PRJEB7037
+Aeromonas_schubertii,GCF_001481395.1_ASM148139v1
+Aeromonas_simiae,GCF_014892695.1_ASM1489269v1
+Aeromonas_sobria,GCF_000820145.1_PRJEB7040
+Aeromonas_taiwanensis,GCF_000820165.1_PRJEB7041
+Aeromonas_tecta,GCF_000820185.1_PRJEB7042
+Aeromonas_veronii,GCF_008693705.1_ASM869370v1
+Edwardsiella_piscicida,GCF_021733145.1_ASM2173314v1
+Staphylococcus_warneri,GCF_003571725.1_ASM357172v1
+Vibrio_alginolyticus,GCF_001471275.2_ASM147127v2
+Vibrio_harveyi,GCF_000770115.1_ASM77011v2
+Vibrio_rotiferianus,GCF_002214395.1_ASM221439v1