Mercurial > repos > dchristiany > frogs_data_manager
diff data_manager/FROGS_data_manager.py @ 9:0d9cb5c5aa35 draft
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
author | dchristiany |
---|---|
date | Mon, 07 Oct 2019 10:56:23 -0400 |
parents | 99b2dfb1fa1c |
children | 238a5328279d |
line wrap: on
line diff
--- a/data_manager/FROGS_data_manager.py Mon Oct 07 04:55:50 2019 -0400 +++ b/data_manager/FROGS_data_manager.py Mon Oct 07 10:56:23 2019 -0400 @@ -5,7 +5,8 @@ def get_args(): parser = argparse.ArgumentParser() parser.add_argument("-d","--database") - parser.add_argument("-r","--resource") + parser.add_argument("--custom_db") + parser.add_argument("--amplicons") parser.add_argument("-o","--output") args = parser.parse_args() return args @@ -16,7 +17,7 @@ data_manager_dict['data_tables'][data_table].append(data_table_entry) return data_manager_dict -def frogs_sources(data_manager_dict,target_directory): +def frogs_sources(data_manager_dict,target_directory,amplicons_list): #get frogs database index frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" @@ -27,6 +28,10 @@ db_index = [line.split("\t") for line in db_index[1:]] db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[3]]+[line[4]] for line in db_index] #add column name + #filter amplicons + if len(amplicons_list)!=0: + db_index = [line for line in db_index if line[4] in amplicons_list] + #get frogs dbs os.chdir(target_directory) dir_name="frogs_db_"+time.strftime("%Y%m%d") @@ -61,14 +66,34 @@ data_table_entry = dict(name = name, value = value, path=path) _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") -#def HVL_sources(resource): +def HVL_sources(data_manager_dict,target_directory): + + #get phiX files + os.chdir(target_directory) + for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]: + file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta") + dl_file = urllib.URLopener() + dl_file.retrieve(link,file_name) -#def phiX_sources(resource): + #get fasta file path + path = os.path.join(target_directory,file_name) + if link.endswith('ITS1.fasta'): + name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d") + elif link.endswith('ITS2.fasta'): + name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d") + value=file_name.replace('.fasta','') + + data_table_entry = dict(name = name, value = value, path=path) + _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db") def main(): #get args from command line args = get_args() + if args.database=="frogs_db_data" and args.custom_db=="true": + amplicons_list = args.amplicons.split(",") + else : + amplicons_list = [] # Extract json file params data_manager_dict = {} @@ -78,11 +103,9 @@ os.mkdir(target_directory) if args.database=="frogs_db_data": - frogs_sources(data_manager_dict,target_directory) + frogs_sources(data_manager_dict,target_directory,amplicons_list) elif args.database=="HVL_db_data": - HVL_sources(args.resource) - elif args.database=="phiX_db_data": - phiX_sources(args.resource) + HVL_sources(data_manager_dict,target_directory) #save info to json file open(filename, 'wb').write(to_json_string(data_manager_dict))