Mercurial > repos > estrain > data_manager_fastani
changeset 0:5dda51264a2d draft
Uploaded
| author | estrain |
|---|---|
| date | Sun, 29 May 2022 19:01:15 +0000 |
| parents | |
| children | 0e80b0fc7351 |
| files | data_manager_fastani/data_manager/data_manager_fastani.py data_manager_fastani/data_manager/data_manager_fastani.xml data_manager_fastani/data_manager_conf.xml data_manager_fastani/test-data/fastani_databases.loc data_manager_fastani/tool-data/fastani_databases.loc data_manager_fastani/tool_data_table_conf.xml.sample data_manager_fastani/tool_data_table_conf.xml.test |
| diffstat | 7 files changed, 184 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/data_manager/data_manager_fastani.py Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# Errol Strain, estrain@gmail.com +# Database downloads for FastANI + +import sys +import os +import tempfile +import json +import re +import argparse +import requests + + +def download_D1(output_directory): + + #FastANI databases from Kostas Lab + url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz" + + cwd = os.getcwd() + os.chdir(output_directory) + + filename = url.split("/")[-1] + with open(filename, "wb") as f: + r = requests.get(url) + f.write(r.content) + + tarcmd="tar xvzf D1.tar.gz" + os.system(tarcmd) + + os.chdir(cwd) + + return output_directory + "/D1" + +def download_VL(output_directory): + + cwd = os.getcwd() + os.chdir(output_directory) + + #FastANI uses folder names in output. Creating user friendly names + #for fish pathogens + accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520', + 'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15', + 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1', + 'NZ_CP032159':'Staphylococcus_warneri_strain_22_1', + 'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ', + 'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632', + 'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04', + 'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245', + 'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'} + + for acc in accdict: + ecmd="mkdir "+ accdict[acc] + os.system(ecmd) + ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + "/" + acc + ".fasta" + os.system(ecmd) + + + os.chdir(cwd) + return output_directory + + +def print_json (version,argspath,argsname,argsout): + + data_table_entry = { + 'data_tables' : { + 'fastani': [ + { + "value":version, + "name":argsname, + "path":argspath, + } + ] + } + } + + with open(argsout, 'w') as fh: + json.dump(data_table_entry, fh, indent=2, sort_keys=True) + +def main(): + + parser = argparse.ArgumentParser(description='Download FastANI Databases') + parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type') + parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description') + parser.add_argument('--out', type=str, required=True, nargs=1, help='output file') + + args = parser.parse_args() + + with open(args.out[0]) as fh: + params = json.load(fh) + + output_directory = params['output_data'][0]['extra_files_path'] + os.mkdir(output_directory) + data_manager_dict = {} + + #Fetch the files and build blast databases + if(args.type[0]=="D1"): + output_directory=download_D1(output_directory) + version="FastANI D1" + elif(args.type[0]=="VL"): + output_directory=download_D1(output_directory) + output_directory=download_VL(output_directory) + version="FastANI D1 + VetLIRN" + + print_json(version,output_directory,args.desc[0],args.out[0]) + +if __name__ == "__main__": main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/data_manager/data_manager_fastani.xml Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,33 @@ +<tool id="data_manager_fastani" name="FastANI Data Manger" tool_type="manage_data" version="0.0.1" profile="20.01"> + <requirements> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + python $__tool_directory__/data_manager_fastani.py --type $input_source_selector --desc $desc --out ${output_file}; + ]]></command> + <inputs> + <conditional name="input_selection"> + <param name="input_source_selector" type="select" label="Database Selection"> + <option value="D1">FastANI D1</option> + <option value="VL">FastANI D1 + VetLIRN Fish Pathogens</option> + </param> + <param name="desc" type="text" format="text" label="Database Name"/> + </conditional> + <param name="fastani_databases" label="Select a database" type="select"> + <options from_data_table="fastani"> + <validator message="No database is available" type="no_options" /> + </options> + </param> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json"/> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + </help> + <citations> + Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number. + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/data_manager_conf.xml Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,19 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/data_manager_fastani.xml" id="data_manager_fastani"> + <data_table name="fastani"> + <output> + <column name="name" /> + <column name="description" /> + <column name="path" output_ref="output_file" > + <move type="directory" relativize_symlinks="True"> + <src>${path}</src> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">fastani/${name}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/fastani/${name}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/test-data/fastani_databases.loc Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of fastani databases +# +# the columns are: +# value name path +# +# for example +# D1 FastANI D1 /tmp/tool-data/fastani/D1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/tool-data/fastani_databases.loc Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,7 @@ +# this is a tab separated file describing the location of fastani databases +# +# the columns are: +# value name path +# +# for example +# D1 FastANI D1 /tmp/tool-data/fastani/D1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/tool_data_table_conf.xml.sample Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="fastani" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/fastani.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_fastani/tool_data_table_conf.xml.test Sun May 29 19:01:15 2022 +0000 @@ -0,0 +1,6 @@ +<tables> + <table name="fastani" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="${__HERE__}/test-data/fastani.loc" /> + </table> +</tables>
