Mercurial > repos > tduigou > data_manager_cache
view data_manager/cache_fetcher.py @ 3:acc6477faf1c draft default tip
planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty
| author | tduigou |
|---|---|
| date | Tue, 05 Jul 2022 14:16:43 +0000 |
| parents | 35c33747b9e3 |
| children |
line wrap: on
line source
from os import ( path as os_path, mkdir as os_mkdir ) from requests import get as r_get from tempfile import ( NamedTemporaryFile ) import argparse import json def download( url: str, file: str = "" ) -> str: """ Download a file from 'url' and save it as 'file'. Parameters: url -- URL the file is downloaded from file -- (Optional) filename the downloaded file is saved into (default: "") Returns: A filename where the downloaded file has stored into """ r = r_get(url) if not file: f = NamedTemporaryFile( mode='wb', delete=False ) file = f.name else: f = open(file, 'wb') f.write(r.content) f.close() return file def download_entries(url, filename, workdir): full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz")) download(url+filename +".json.gz", full_filename) data_manager_entry = {} data_manager_entry["value"] = filename data_manager_entry["name"] = filename data_manager_entry["path"] = full_filename # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access) #if ix % 5 == 0: # time.sleep(1) yield data_manager_entry parser = argparse.ArgumentParser(description="Download a cache file") parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download") parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") args = parser.parse_args() url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" filename= args.filename data_manager_json = {"data_tables": {}} with open(args.outfile) as fh: params = json.load(fh) workdir = params["output_data"][0]["extra_files_path"] os_mkdir(workdir) #full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' entries = list(download_entries(url, filename, workdir)) data_manager_json["data_tables"]["cache"] = entries with open(args.outfile, "w") as fh: json.dump(data_manager_json, fh, sort_keys=True)
