changeset 0:5dda51264a2d draft

Uploaded
author estrain
date Sun, 29 May 2022 19:01:15 +0000
parents
children 0e80b0fc7351
files data_manager_fastani/data_manager/data_manager_fastani.py data_manager_fastani/data_manager/data_manager_fastani.xml data_manager_fastani/data_manager_conf.xml data_manager_fastani/test-data/fastani_databases.loc data_manager_fastani/tool-data/fastani_databases.loc data_manager_fastani/tool_data_table_conf.xml.sample data_manager_fastani/tool_data_table_conf.xml.test
diffstat 7 files changed, 184 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.py	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# Errol Strain, estrain@gmail.com
+# Database downloads for FastANI 
+
+import sys
+import os
+import tempfile
+import json
+import re
+import argparse
+import requests
+
+
+def download_D1(output_directory):
+
+    #FastANI databases from Kostas Lab 
+    url="http://enve-omics.ce.gatech.edu/data/public_fastani/D1.tar.gz"
+
+    cwd = os.getcwd() 
+    os.chdir(output_directory)
+
+    filename = url.split("/")[-1]
+    with open(filename, "wb") as f:
+      r = requests.get(url)
+      f.write(r.content)
+ 
+    tarcmd="tar xvzf D1.tar.gz"
+    os.system(tarcmd)
+
+    os.chdir(cwd)    
+   
+    return output_directory + "/D1"
+
+def download_VL(output_directory):
+
+    cwd = os.getcwd() 
+    os.chdir(output_directory)
+   
+    #FastANI uses folder names in output. Creating user friendly names
+    #for fish pathogens 
+    accdict={'NZ_CP018680':'Vibrio_harveyi_strain_QT520',
+      'SBIG01000001':'Vibrio_alginolyticus_strain_LF_TCBS_15',
+      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1',
+      'NZ_CP032159':'Staphylococcus_warneri_strain_22_1',
+      'NZ_CP090968':'Edwardsiella_piscicida_strain_18EpOKYJ',
+      'NZ_CP044060':'Aeromonas_veronii_strain_FDAARGOS_632',
+      'NZ_AP022254':'Aeromonas_caviae_strain_WP8_S18_ESBL_04',
+      'NZ_CDBW01000001':'Aeromonas_sobria_strain_CECT_4245',
+      'NZ_CP018311':'Vibrio_rotiferianus_strain_B64D1'}
+
+    for acc in accdict: 
+      ecmd="mkdir "+ accdict[acc]
+      os.system(ecmd) 
+      ecmd = "efetch -db nuccore -id " + acc + " -format fasta > " + accdict[acc] + "/" + acc + ".fasta"
+      os.system(ecmd) 
+
+
+    os.chdir(cwd)    
+    return output_directory 
+
+
+def print_json (version,argspath,argsname,argsout):
+
+    data_table_entry = {
+      'data_tables' : {
+        'fastani': [
+          {
+            "value":version,
+            "name":argsname,
+            "path":argspath,
+          }
+        ]
+      }
+    }
+
+    with open(argsout, 'w') as fh:
+      json.dump(data_table_entry, fh, indent=2, sort_keys=True)
+        
+def main():
+   
+    parser = argparse.ArgumentParser(description='Download FastANI Databases')
+    parser.add_argument('--type', type=str, required=True, nargs=1, help='Database Type')
+    parser.add_argument('--desc', type=str, required=True, nargs=1, help='Database Description')
+    parser.add_argument('--out', type=str, required=True, nargs=1, help='output file')
+
+    args = parser.parse_args()
+
+    with open(args.out[0]) as fh:
+        params = json.load(fh)
+
+    output_directory = params['output_data'][0]['extra_files_path']
+    os.mkdir(output_directory)
+    data_manager_dict = {}
+
+    #Fetch the files and build blast databases
+    if(args.type[0]=="D1"):
+      output_directory=download_D1(output_directory)    
+      version="FastANI D1"
+    elif(args.type[0]=="VL"): 
+      output_directory=download_D1(output_directory)    
+      output_directory=download_VL(output_directory)    
+      version="FastANI D1 + VetLIRN"
+      
+    print_json(version,output_directory,args.desc[0],args.out[0])
+
+if __name__ == "__main__": main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager/data_manager_fastani.xml	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,33 @@
+<tool id="data_manager_fastani" name="FastANI Data Manger" tool_type="manage_data" version="0.0.1" profile="20.01">
+    <requirements>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python $__tool_directory__/data_manager_fastani.py --type $input_source_selector --desc $desc --out ${output_file};
+    ]]></command>
+    <inputs>
+        <conditional name="input_selection">
+            <param name="input_source_selector" type="select" label="Database Selection">
+                <option value="D1">FastANI D1</option>
+                <option value="VL">FastANI D1 + VetLIRN Fish Pathogens</option>
+            </param>
+            <param name="desc" type="text" format="text" label="Database Name"/>
+        </conditional>
+        <param name="fastani_databases" label="Select a database" type="select">
+            <options from_data_table="fastani">
+                <validator message="No database is available" type="no_options" />
+            </options>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json"/>
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+    </help>
+    <citations>
+     Refer to https://ftp.ncbi.nlm.nih.gov/pathogen/Antimicrobial_resistance/AMRFinderPlus/database/latest/ for latest database version number.
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/data_manager_conf.xml	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,19 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/data_manager_fastani.xml" id="data_manager_fastani">
+      <data_table name="fastani">
+        <output>
+          <column name="name" />
+          <column name="description" />
+          <column name="path" output_ref="output_file" >
+            <move type="directory" relativize_symlinks="True">
+              <src>${path}</src>
+              <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">fastani/${name}</target>
+            </move>
+            <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/fastani/${name}</value_translation>
+            <value_translation type="function">abspath</value_translation>
+          </column>
+        </output>
+      </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/test-data/fastani_databases.loc	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of fastani databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# D1	FastANI D1	/tmp/tool-data/fastani/D1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool-data/fastani_databases.loc	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,7 @@
+# this is a tab separated file describing the location of fastani databases
+#
+# the columns are:
+# value name path
+#
+# for example
+# D1	FastANI D1	/tmp/tool-data/fastani/D1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool_data_table_conf.xml.sample	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="fastani" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/fastani.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_fastani/tool_data_table_conf.xml.test	Sun May 29 19:01:15 2022 +0000
@@ -0,0 +1,6 @@
+<tables>
+    <table name="fastani" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="${__HERE__}/test-data/fastani.loc" />
+    </table>
+</tables>