view data_manager/cache_fetcher.py @ 3:acc6477faf1c draft default tip

planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty
author tduigou
date Tue, 05 Jul 2022 14:16:43 +0000
parents 35c33747b9e3
children
line wrap: on
line source

from os import (
    path as os_path,
    mkdir as os_mkdir
)

from requests import get as r_get

from tempfile import (
    NamedTemporaryFile
)

import argparse
import json

def download(
    url: str,
    file: str = ""
) -> str:
    """
    Download a file from 'url' and save it as 'file'.
    Parameters:
    url  -- URL the file is downloaded from
    file -- (Optional) filename the downloaded file is saved into (default: "")
    Returns:
    A filename where the downloaded file has stored into
    """
    r = r_get(url)
    if not file:
        f = NamedTemporaryFile(
            mode='wb',
            delete=False
        )
        file = f.name
    else:
        f = open(file, 'wb')
    f.write(r.content)
    f.close()
    return file

def download_entries(url, filename, workdir):
    full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz"))

    download(url+filename +".json.gz", full_filename)
    data_manager_entry = {}
    data_manager_entry["value"] = filename
    data_manager_entry["name"] = filename
    data_manager_entry["path"] = full_filename

    # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access)
    #if ix % 5 == 0:
    #    time.sleep(1)
    yield data_manager_entry


parser = argparse.ArgumentParser(description="Download a cache file")
parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download")
parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into")

args = parser.parse_args()

url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"
filename= args.filename


data_manager_json = {"data_tables": {}}
with open(args.outfile) as fh:
    params = json.load(fh)

workdir = params["output_data"][0]["extra_files_path"]
os_mkdir(workdir)

#full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'


entries = list(download_entries(url, filename, workdir))

data_manager_json["data_tables"]["cache"] = entries
with open(args.outfile, "w") as fh:
    json.dump(data_manager_json, fh, sort_keys=True)