Mercurial > repos > dchristiany > frogs_data_manager
comparison data_manager/FROGS_data_manager.py @ 0:7caea40b2a30 draft
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
| author | dchristiany |
|---|---|
| date | Wed, 02 Oct 2019 11:00:08 -0400 |
| parents | |
| children | 95d5191f1dc4 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7caea40b2a30 |
|---|---|
| 1 # -*- coding: utf-8 -*- | |
| 2 from galaxy.util.json import from_json_string, to_json_string | |
| 3 import os, sys, argparse, time, json, requests, urllib | |
| 4 | |
| 5 def get_args(): | |
| 6 parser = argparse.ArgumentParser() | |
| 7 parser.add_argument("-d","--database") | |
| 8 parser.add_argument("-r","--resource") | |
| 9 parser.add_argument("-o","--output") | |
| 10 args = parser.parse_args() | |
| 11 return args | |
| 12 | |
| 13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): | |
| 14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
| 15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) | |
| 16 data_manager_dict['data_tables'][data_table].append(data_table_entry) | |
| 17 return data_manager_dict | |
| 18 | |
| 19 def frogs_sources(data_manager_dict,target_directory): | |
| 20 | |
| 21 #get frogs database index | |
| 22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" | |
| 23 with requests.Session() as s: | |
| 24 download = s.get(frogs_db_index_link) | |
| 25 decoded_content = download.content.decode('utf-8') | |
| 26 db_index = download.content.splitlines() | |
| 27 db_index = [line.split("\t") for line in db_index[1:]] | |
| 28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index] #add column name | |
| 29 | |
| 30 #get frogs dbs | |
| 31 dir_name="frogs_db_"+time.strftime("%Y%m%d") | |
| 32 os.mkdir(dir_name) | |
| 33 dbs=set([]) | |
| 34 for line in db_index[:2]: | |
| 35 value=line[4] | |
| 36 name=value.replace("_"," ") | |
| 37 link=line[5] | |
| 38 | |
| 39 #download frogs db | |
| 40 dl_file = urllib.URLopener() | |
| 41 dl_file.retrieve(link, "tmp.tar.gz") | |
| 42 | |
| 43 #unzip frogs db | |
| 44 with tarfile.open("tmp.tar.gz") as tar: | |
| 45 tar.extractall(dir_name) | |
| 46 tar.close() | |
| 47 os.remove('tmp.tar.gz') | |
| 48 | |
| 49 #get fasta file path | |
| 50 tmp = set(os.listdir(dir_name)) | |
| 51 new_db = dir_name+"/"+"".join(tmp.difference(dbs)) | |
| 52 files = os.listdir(new_db) | |
| 53 fasta = "".join([file for file in files if file.endswith('.fasta')]) | |
| 54 path = new_db+'/'+fasta | |
| 55 dbs = os.listdir(dir_name) | |
| 56 release = value+"_"+time.strftime("%Y-%m-%d") | |
| 57 date=time.strftime("%Y%m%d") | |
| 58 path = os.path.join(target_directory,path) | |
| 59 | |
| 60 data_table_entry = dict(name = name, value = value, path=path) | |
| 61 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") | |
| 62 | |
| 63 #def HVL_sources(resource): | |
| 64 | |
| 65 #def phiX_sources(resource): | |
| 66 | |
| 67 def main(): | |
| 68 | |
| 69 #get args from command line | |
| 70 args = get_args() | |
| 71 | |
| 72 # Extract json file params | |
| 73 data_manager_dict = {} | |
| 74 filename = args.output | |
| 75 params = from_json_string(open(filename).read()) | |
| 76 target_directory = params[ 'output_data' ][0]['extra_files_path'] | |
| 77 os.mkdir(target_directory) | |
| 78 | |
| 79 if args.database=="frogs_db_data": | |
| 80 frogs_sources(data_manager_dict,target_directory) | |
| 81 elif args.database=="HVL_db_data": | |
| 82 HVL_sources(args.resource) | |
| 83 elif args.database=="phiX_db_data": | |
| 84 phiX_sources(args.resource) | |
| 85 | |
| 86 if __name__ == "__main__": | |
| 87 main() |
