diff data_manager/FROGS_data_manager.py @ 9:0d9cb5c5aa35 draft

planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
author dchristiany
date Mon, 07 Oct 2019 10:56:23 -0400
parents 99b2dfb1fa1c
children 238a5328279d
line wrap: on
line diff
--- a/data_manager/FROGS_data_manager.py	Mon Oct 07 04:55:50 2019 -0400
+++ b/data_manager/FROGS_data_manager.py	Mon Oct 07 10:56:23 2019 -0400
@@ -5,7 +5,8 @@
 def get_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("-d","--database")
-    parser.add_argument("-r","--resource")
+    parser.add_argument("--custom_db")
+    parser.add_argument("--amplicons")
     parser.add_argument("-o","--output")
     args = parser.parse_args()
     return args
@@ -16,7 +17,7 @@
     data_manager_dict['data_tables'][data_table].append(data_table_entry)
     return data_manager_dict
 
-def frogs_sources(data_manager_dict,target_directory):
+def frogs_sources(data_manager_dict,target_directory,amplicons_list):
 
     #get frogs database index
     frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
@@ -27,6 +28,10 @@
         db_index = [line.split("\t") for line in db_index[1:]]
         db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index]  #add column name
 
+    #filter amplicons
+    if len(amplicons_list)!=0:
+        db_index = [line for line in db_index if line[4] in amplicons_list]
+
     #get frogs dbs
     os.chdir(target_directory)
     dir_name="frogs_db_"+time.strftime("%Y%m%d")
@@ -61,14 +66,34 @@
         data_table_entry = dict(name = name, value = value, path=path)
         _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
 
-#def HVL_sources(resource):
+def HVL_sources(data_manager_dict,target_directory):
+
+    #get phiX files
+    os.chdir(target_directory)
+    for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]:
+        file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta")
+        dl_file = urllib.URLopener()
+        dl_file.retrieve(link,file_name)
 
-#def phiX_sources(resource):
+        #get fasta file path
+        path = os.path.join(target_directory,file_name)
+        if link.endswith('ITS1.fasta'):
+            name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d")
+        elif link.endswith('ITS2.fasta'):
+            name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d")
+        value=file_name.replace('.fasta','')
+
+        data_table_entry = dict(name = name, value = value, path=path)
+        _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db")
 
 def main():
 
     #get args from command line
     args = get_args()
+    if args.database=="frogs_db_data" and args.custom_db=="true":
+        amplicons_list = args.amplicons.split(",")
+    else :
+        amplicons_list = []
 
     # Extract json file params
     data_manager_dict = {}
@@ -78,11 +103,9 @@
     os.mkdir(target_directory)
 
     if args.database=="frogs_db_data":
-        frogs_sources(data_manager_dict,target_directory)
+        frogs_sources(data_manager_dict,target_directory,amplicons_list)
     elif args.database=="HVL_db_data":
-        HVL_sources(args.resource)
-    elif args.database=="phiX_db_data":
-        phiX_sources(args.resource)
+        HVL_sources(data_manager_dict,target_directory)
 
     #save info to json file
     open(filename, 'wb').write(to_json_string(data_manager_dict))