Mercurial > repos > dchristiany > frogs_data_manager
comparison data_manager/FROGS_data_manager.py @ 9:0d9cb5c5aa35 draft
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
| author | dchristiany |
|---|---|
| date | Mon, 07 Oct 2019 10:56:23 -0400 |
| parents | 99b2dfb1fa1c |
| children | 238a5328279d |
comparison
equal
deleted
inserted
replaced
| 8:cac51639c40f | 9:0d9cb5c5aa35 |
|---|---|
| 3 import os, sys, argparse, time, json, requests, urllib, tarfile | 3 import os, sys, argparse, time, json, requests, urllib, tarfile |
| 4 | 4 |
| 5 def get_args(): | 5 def get_args(): |
| 6 parser = argparse.ArgumentParser() | 6 parser = argparse.ArgumentParser() |
| 7 parser.add_argument("-d","--database") | 7 parser.add_argument("-d","--database") |
| 8 parser.add_argument("-r","--resource") | 8 parser.add_argument("--custom_db") |
| 9 parser.add_argument("--amplicons") | |
| 9 parser.add_argument("-o","--output") | 10 parser.add_argument("-o","--output") |
| 10 args = parser.parse_args() | 11 args = parser.parse_args() |
| 11 return args | 12 return args |
| 12 | 13 |
| 13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): | 14 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): |
| 14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | 15 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
| 15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) | 16 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) |
| 16 data_manager_dict['data_tables'][data_table].append(data_table_entry) | 17 data_manager_dict['data_tables'][data_table].append(data_table_entry) |
| 17 return data_manager_dict | 18 return data_manager_dict |
| 18 | 19 |
| 19 def frogs_sources(data_manager_dict,target_directory): | 20 def frogs_sources(data_manager_dict,target_directory,amplicons_list): |
| 20 | 21 |
| 21 #get frogs database index | 22 #get frogs database index |
| 22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" | 23 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" |
| 23 with requests.Session() as s: | 24 with requests.Session() as s: |
| 24 download = s.get(frogs_db_index_link) | 25 download = s.get(frogs_db_index_link) |
| 25 decoded_content = download.content.decode('utf-8') | 26 decoded_content = download.content.decode('utf-8') |
| 26 db_index = download.content.splitlines() | 27 db_index = download.content.splitlines() |
| 27 db_index = [line.split("\t") for line in db_index[1:]] | 28 db_index = [line.split("\t") for line in db_index[1:]] |
| 28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index] #add column name | 29 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index] #add column name |
| 30 | |
| 31 #filter amplicons | |
| 32 if len(amplicons_list)!=0: | |
| 33 db_index = [line for line in db_index if line[4] in amplicons_list] | |
| 29 | 34 |
| 30 #get frogs dbs | 35 #get frogs dbs |
| 31 os.chdir(target_directory) | 36 os.chdir(target_directory) |
| 32 dir_name="frogs_db_"+time.strftime("%Y%m%d") | 37 dir_name="frogs_db_"+time.strftime("%Y%m%d") |
| 33 os.mkdir(dir_name) | 38 os.mkdir(dir_name) |
| 59 path = os.path.join(target_directory,path) | 64 path = os.path.join(target_directory,path) |
| 60 | 65 |
| 61 data_table_entry = dict(name = name, value = value, path=path) | 66 data_table_entry = dict(name = name, value = value, path=path) |
| 62 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") | 67 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") |
| 63 | 68 |
| 64 #def HVL_sources(resource): | 69 def HVL_sources(data_manager_dict,target_directory): |
| 65 | 70 |
| 66 #def phiX_sources(resource): | 71 #get phiX files |
| 72 os.chdir(target_directory) | |
| 73 for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]: | |
| 74 file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta") | |
| 75 dl_file = urllib.URLopener() | |
| 76 dl_file.retrieve(link,file_name) | |
| 77 | |
| 78 #get fasta file path | |
| 79 path = os.path.join(target_directory,file_name) | |
| 80 if link.endswith('ITS1.fasta'): | |
| 81 name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d") | |
| 82 elif link.endswith('ITS2.fasta'): | |
| 83 name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d") | |
| 84 value=file_name.replace('.fasta','') | |
| 85 | |
| 86 data_table_entry = dict(name = name, value = value, path=path) | |
| 87 _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db") | |
| 67 | 88 |
| 68 def main(): | 89 def main(): |
| 69 | 90 |
| 70 #get args from command line | 91 #get args from command line |
| 71 args = get_args() | 92 args = get_args() |
| 93 if args.database=="frogs_db_data" and args.custom_db=="true": | |
| 94 amplicons_list = args.amplicons.split(",") | |
| 95 else : | |
| 96 amplicons_list = [] | |
| 72 | 97 |
| 73 # Extract json file params | 98 # Extract json file params |
| 74 data_manager_dict = {} | 99 data_manager_dict = {} |
| 75 filename = args.output | 100 filename = args.output |
| 76 params = from_json_string(open(filename).read()) | 101 params = from_json_string(open(filename).read()) |
| 77 target_directory = params[ 'output_data' ][0]['extra_files_path'] | 102 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
| 78 os.mkdir(target_directory) | 103 os.mkdir(target_directory) |
| 79 | 104 |
| 80 if args.database=="frogs_db_data": | 105 if args.database=="frogs_db_data": |
| 81 frogs_sources(data_manager_dict,target_directory) | 106 frogs_sources(data_manager_dict,target_directory,amplicons_list) |
| 82 elif args.database=="HVL_db_data": | 107 elif args.database=="HVL_db_data": |
| 83 HVL_sources(args.resource) | 108 HVL_sources(data_manager_dict,target_directory) |
| 84 elif args.database=="phiX_db_data": | |
| 85 phiX_sources(args.resource) | |
| 86 | 109 |
| 87 #save info to json file | 110 #save info to json file |
| 88 open(filename, 'wb').write(to_json_string(data_manager_dict)) | 111 open(filename, 'wb').write(to_json_string(data_manager_dict)) |
| 89 | 112 |
| 90 if __name__ == "__main__": | 113 if __name__ == "__main__": |
