comparison data_manager/FROGS_data_manager.py @ 9:0d9cb5c5aa35 draft

planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
author dchristiany
date Mon, 07 Oct 2019 10:56:23 -0400
parents 99b2dfb1fa1c
children 238a5328279d
comparison
equal deleted inserted replaced
8:cac51639c40f 9:0d9cb5c5aa35
3 import os, sys, argparse, time, json, requests, urllib, tarfile 3 import os, sys, argparse, time, json, requests, urllib, tarfile
4 4
5 def get_args(): 5 def get_args():
6 parser = argparse.ArgumentParser() 6 parser = argparse.ArgumentParser()
7 parser.add_argument("-d","--database") 7 parser.add_argument("-d","--database")
8 parser.add_argument("-r","--resource") 8 parser.add_argument("--custom_db")
9 parser.add_argument("--amplicons")
9 parser.add_argument("-o","--output") 10 parser.add_argument("-o","--output")
10 args = parser.parse_args() 11 args = parser.parse_args()
11 return args 12 return args
12 13
13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): 14 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) 15 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) 16 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
16 data_manager_dict['data_tables'][data_table].append(data_table_entry) 17 data_manager_dict['data_tables'][data_table].append(data_table_entry)
17 return data_manager_dict 18 return data_manager_dict
18 19
19 def frogs_sources(data_manager_dict,target_directory): 20 def frogs_sources(data_manager_dict,target_directory,amplicons_list):
20 21
21 #get frogs database index 22 #get frogs database index
22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" 23 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
23 with requests.Session() as s: 24 with requests.Session() as s:
24 download = s.get(frogs_db_index_link) 25 download = s.get(frogs_db_index_link)
25 decoded_content = download.content.decode('utf-8') 26 decoded_content = download.content.decode('utf-8')
26 db_index = download.content.splitlines() 27 db_index = download.content.splitlines()
27 db_index = [line.split("\t") for line in db_index[1:]] 28 db_index = [line.split("\t") for line in db_index[1:]]
28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index] #add column name 29 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index] #add column name
30
31 #filter amplicons
32 if len(amplicons_list)!=0:
33 db_index = [line for line in db_index if line[4] in amplicons_list]
29 34
30 #get frogs dbs 35 #get frogs dbs
31 os.chdir(target_directory) 36 os.chdir(target_directory)
32 dir_name="frogs_db_"+time.strftime("%Y%m%d") 37 dir_name="frogs_db_"+time.strftime("%Y%m%d")
33 os.mkdir(dir_name) 38 os.mkdir(dir_name)
59 path = os.path.join(target_directory,path) 64 path = os.path.join(target_directory,path)
60 65
61 data_table_entry = dict(name = name, value = value, path=path) 66 data_table_entry = dict(name = name, value = value, path=path)
62 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") 67 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
63 68
64 #def HVL_sources(resource): 69 def HVL_sources(data_manager_dict,target_directory):
65 70
66 #def phiX_sources(resource): 71 #get phiX files
72 os.chdir(target_directory)
73 for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]:
74 file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta")
75 dl_file = urllib.URLopener()
76 dl_file.retrieve(link,file_name)
77
78 #get fasta file path
79 path = os.path.join(target_directory,file_name)
80 if link.endswith('ITS1.fasta'):
81 name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d")
82 elif link.endswith('ITS2.fasta'):
83 name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d")
84 value=file_name.replace('.fasta','')
85
86 data_table_entry = dict(name = name, value = value, path=path)
87 _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db")
67 88
68 def main(): 89 def main():
69 90
70 #get args from command line 91 #get args from command line
71 args = get_args() 92 args = get_args()
93 if args.database=="frogs_db_data" and args.custom_db=="true":
94 amplicons_list = args.amplicons.split(",")
95 else :
96 amplicons_list = []
72 97
73 # Extract json file params 98 # Extract json file params
74 data_manager_dict = {} 99 data_manager_dict = {}
75 filename = args.output 100 filename = args.output
76 params = from_json_string(open(filename).read()) 101 params = from_json_string(open(filename).read())
77 target_directory = params[ 'output_data' ][0]['extra_files_path'] 102 target_directory = params[ 'output_data' ][0]['extra_files_path']
78 os.mkdir(target_directory) 103 os.mkdir(target_directory)
79 104
80 if args.database=="frogs_db_data": 105 if args.database=="frogs_db_data":
81 frogs_sources(data_manager_dict,target_directory) 106 frogs_sources(data_manager_dict,target_directory,amplicons_list)
82 elif args.database=="HVL_db_data": 107 elif args.database=="HVL_db_data":
83 HVL_sources(args.resource) 108 HVL_sources(data_manager_dict,target_directory)
84 elif args.database=="phiX_db_data":
85 phiX_sources(args.resource)
86 109
87 #save info to json file 110 #save info to json file
88 open(filename, 'wb').write(to_json_string(data_manager_dict)) 111 open(filename, 'wb').write(to_json_string(data_manager_dict))
89 112
90 if __name__ == "__main__": 113 if __name__ == "__main__":