changeset 20:1af0cdf9b1fa draft

Uploaded
author estrain
date Wed, 20 Jul 2022 21:54:50 +0000
parents ceda2bd3e41d
children 7fb415930708
files data_manager_fastani/data_manager/data_manager_fastani.py data_manager_fastani/data_manager/data_manager_fastani.xml data_manager_fastani/tool-data/accessions.csv
diffstat 3 files changed, 58 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager_fastani/data_manager/data_manager_fastani.py	Mon May 30 00:56:12 2022 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.py	Wed Jul 20 21:54:50 2022 +0000
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#/usr/bin/env python
 # Errol Strain, estrain@gmail.com
 # Database downloads for FastANI 
 
@@ -9,6 +9,7 @@
 import re
 import argparse
 import requests
+import csv
 
 def download_D1(output_directory):
 
@@ -37,20 +38,24 @@
 
     baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"
 
-    urldict={'Aeromonas_caviae_strain_WP8_S18_ESBL_04':'Aeromonas_caviae/representative/GCF_014169735.1_ASM1416973v1/GCF_014169735.1_ASM1416973v1_genomic.fna.gz',
-     'Aeromonas_veronii_strain_FDAARGOS_632':'Aeromonas_veronii/representative/GCF_008693705.1_ASM869370v1/GCF_008693705.1_ASM869370v1_genomic.fna.gz',
-     'Aeromonas_sobria_strain_CECT_4245':'Aeromonas_sobria/representative/GCF_000820145.1_PRJEB7040/GCF_000820145.1_PRJEB7040_genomic.fna.gz',
-     'Edwardsiella_piscicida_strain_18EpOKYJ':'Edwardsiella_piscicida/representative/GCF_021733145.1_ASM2173314v1/GCF_021733145.1_ASM2173314v1_genomic.fna.gz',
-     'Vibrio_alginolyticus_FDAARGOS_97':'Vibrio_alginolyticus/representative/GCF_001471275.2_ASM147127v2/GCF_001471275.2_ASM147127v2_cds_from_genomic.fna.gz',
-     'Vibrio_harveyi_ATCC_33843':'Vibrio_harveyi/representative/GCF_000770115.1_ASM77011v2/GCF_000770115.1_ASM77011v2_genomic.fna.gz',
-     'Vibrio_rotiferianus_strain_B64D1':'Vibrio_rotiferianus/representative/GCF_002214395.1_ASM221439v1/GCF_002214395.1_ASM221439v1_genomic.fna.gz',
-     'Staphylococcus_warneri_strain_22_1':'Staphylococcus_warneri/representative/GCF_003571725.1_ASM357172v1/GCF_003571725.1_ASM357172v1_genomic.fna.gz'}
-   
+def download_VL(output_directory,accfile):
+
+    cwd = os.getcwd()
+    os.chdir(output_directory)
+
+    baseurl="https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/"
+
+    urldict = {}
+    with open(accfile,mode='r') as inp:
+      reader = csv.reader(inp)
+      urldict = {rows[0]:rows[1] for rows in reader}
+
     #FastANI uses filenames in output. Creating user friendly names
-    #for fish pathogens 
-    for key in urldict: 
-      url=baseurl+urldict[key]
-      filename = key + '.fna.gz' 
+    #for fish pathogens
+    for key in urldict:
+      url=baseurl+key+"/representative/"+urldict[key]+"/"+urldict[key]+"_genomic.fna.gz"
+      filename = key + '.fna.gz'
+      print(url+"\n")
       with open(filename, "wb") as f:
         r = requests.get(url)
         f.write(r.content)
@@ -84,6 +89,7 @@
     parser = argparse.ArgumentParser(description='Download FastANI Databases')
     parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
     parser.add_argument('--name', type=str, required=True, nargs=1, help='Unique Database Folder Name')
+    parser.add_argument('--acc', type=str, required=True, nargs=1, help='CSV Accession file with NCBI ftp folder IDs, see tool-data')
     parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
 
     args = parser.parse_args()
@@ -101,7 +107,7 @@
       version="FastANI D1"
     elif(args.type[0]=="VL"): 
       download_D1(output_directory)    
-      download_VL(output_directory)    
+      download_VL(output_directory,args.acc[0])    
       version="FastANI D1 + VetLIRN"
       
     print_json(version,output_directory,args.name[0],args.out[0])
--- a/data_manager_fastani/data_manager/data_manager_fastani.xml	Mon May 30 00:56:12 2022 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.xml	Wed Jul 20 21:54:50 2022 +0000
@@ -2,7 +2,7 @@
     <requirements>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        python $__tool_directory__/data_manager_fastani.py --type $input_source_selector --name $name --out ${output_file};
+        python $__tool_directory__/data_manager_fastani.py --type $input_source_selector --name $name --acc ../accessions.csv --out ${output_file};
     ]]></command>
     <inputs>
         <param name="input_source_selector" type="select" label="Database Selection">
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool-data/accessions.csv	Wed Jul 20 21:54:50 2022 +0000
@@ -0,0 +1,36 @@
+Aeromonas_allosaccharophila,GCF_016026615.1_ASM1602661v1
+Aeromonas_aquatica,GCF_000764655.1_ASM76465v1
+Aeromonas_australiensis,GCF_000819725.1_PRJEB7021
+Aeromonas_bestiarum,GCF_002906925.1_ASM290692v1
+Aeromonas_bivalvium,GCF_003265465.1_ASM326546v1
+Aeromonas_cavernicola,GCF_002795305.1_ASM279530v1
+Aeromonas_caviae,GCF_014169735.1_ASM1416973v1
+Aeromonas_dhakensis,GCF_020405345.1_ASM2040534v1
+Aeromonas_diversa,GCF_000819805.1_PRJEB7026
+Aeromonas_encheleia,GCF_900637545.1_51438_G01
+Aeromonas_enteropelogenes,GCF_020341435.1_ASM2034143v1
+Aeromonas_eucrenophila,GCF_000819865.1_PRJEB7029
+Aeromonas_finlandensis,GCF_000764645.1_ASM76464v1
+Aeromonas_fluvialis,GCF_000819885.1_PRJEB7030
+Aeromonas_hydrophila,GCF_017310215.1_ASM1731021v1
+Aeromonas_jandaei,GCF_016127195.1_ASM1612719v1
+Aeromonas_lacus,GCF_000764665.1_ASM76466v1
+Aeromonas_lusitana,GCF_002812985.1_MDC2473
+Aeromonas_media,GCF_020423125.1_ASM2042312v1
+Aeromonas_molluscorum,GCF_000388115.1_Amol1.0
+Aeromonas_piscicola,GCF_000820005.1_PRJEB7033
+Aeromonas_popoffii,GCF_000820025.1_PRJEB7034
+Aeromonas_rivipollensis,GCF_010974825.1_ASM1097482v1
+Aeromonas_rivuli,GCF_020149575.1_ASM2014957v1
+Aeromonas_sanarellii,GCF_000820085.1_PRJEB7037
+Aeromonas_schubertii,GCF_001481395.1_ASM148139v1
+Aeromonas_simiae,GCF_014892695.1_ASM1489269v1
+Aeromonas_sobria,GCF_000820145.1_PRJEB7040
+Aeromonas_taiwanensis,GCF_000820165.1_PRJEB7041
+Aeromonas_tecta,GCF_000820185.1_PRJEB7042
+Aeromonas_veronii,GCF_008693705.1_ASM869370v1
+Edwardsiella_piscicida,GCF_021733145.1_ASM2173314v1
+Staphylococcus_warneri,GCF_003571725.1_ASM357172v1
+Vibrio_alginolyticus,GCF_001471275.2_ASM147127v2
+Vibrio_harveyi,GCF_000770115.1_ASM77011v2
+Vibrio_rotiferianus,GCF_002214395.1_ASM221439v1