annotate data_manager/cache_fetcher.py @ 3:acc6477faf1c draft default tip

planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty
author tduigou
date Tue, 05 Jul 2022 14:16:43 +0000
parents 35c33747b9e3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
1 from os import (
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
2 path as os_path,
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
3 mkdir as os_mkdir
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
4 )
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
5
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
6 from requests import get as r_get
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
7
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
8 from tempfile import (
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
9 NamedTemporaryFile
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
10 )
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
11
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
12 import argparse
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
13 import json
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
14
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
15 def download(
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
16 url: str,
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
17 file: str = ""
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
18 ) -> str:
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
19 """
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
20 Download a file from 'url' and save it as 'file'.
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
21 Parameters:
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
22 url -- URL the file is downloaded from
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
23 file -- (Optional) filename the downloaded file is saved into (default: "")
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
24 Returns:
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
25 A filename where the downloaded file has stored into
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
26 """
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
27 r = r_get(url)
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
28 if not file:
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
29 f = NamedTemporaryFile(
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
30 mode='wb',
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
31 delete=False
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
32 )
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
33 file = f.name
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
34 else:
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
35 f = open(file, 'wb')
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
36 f.write(r.content)
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
37 f.close()
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
38 return file
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
39
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
40 def download_entries(url, filename, workdir):
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
41 full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz"))
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
42
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
43 download(url+filename +".json.gz", full_filename)
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
44 data_manager_entry = {}
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
45 data_manager_entry["value"] = filename
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
46 data_manager_entry["name"] = filename
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
47 data_manager_entry["path"] = full_filename
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
48
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
49 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access)
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
50 #if ix % 5 == 0:
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
51 # time.sleep(1)
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
52 yield data_manager_entry
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
53
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
54
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
55 parser = argparse.ArgumentParser(description="Download a cache file")
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
56 parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download")
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
57 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into")
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
58
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
59 args = parser.parse_args()
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
60
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
61 url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/"
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
62 filename= args.filename
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
63
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
64
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
65 data_manager_json = {"data_tables": {}}
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
66 with open(args.outfile) as fh:
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
67 params = json.load(fh)
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
68
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
69 workdir = params["output_data"][0]["extra_files_path"]
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
70 os_mkdir(workdir)
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
71
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
72 #full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz'
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
73
0
e0b92d203870 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff changeset
74
1
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
75 entries = list(download_entries(url, filename, workdir))
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
76
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
77 data_manager_json["data_tables"]["cache"] = entries
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
78 with open(args.outfile, "w") as fh:
35c33747b9e3 "planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents: 0
diff changeset
79 json.dump(data_manager_json, fh, sort_keys=True)