Mercurial > repos > tduigou > data_manager_cache
annotate data_manager/cache_fetcher.py @ 3:acc6477faf1c draft default tip
planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty
| author | tduigou |
|---|---|
| date | Tue, 05 Jul 2022 14:16:43 +0000 |
| parents | 35c33747b9e3 |
| children |
| rev | line source |
|---|---|
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
1 from os import ( |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
2 path as os_path, |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
3 mkdir as os_mkdir |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
4 ) |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
5 |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
6 from requests import get as r_get |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
7 |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
8 from tempfile import ( |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
9 NamedTemporaryFile |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
10 ) |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
11 |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
12 import argparse |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
13 import json |
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
14 |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
15 def download( |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
16 url: str, |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
17 file: str = "" |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
18 ) -> str: |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
19 """ |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
20 Download a file from 'url' and save it as 'file'. |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
21 Parameters: |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
22 url -- URL the file is downloaded from |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
23 file -- (Optional) filename the downloaded file is saved into (default: "") |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
24 Returns: |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
25 A filename where the downloaded file has stored into |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
26 """ |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
27 r = r_get(url) |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
28 if not file: |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
29 f = NamedTemporaryFile( |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
30 mode='wb', |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
31 delete=False |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
32 ) |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
33 file = f.name |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
34 else: |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
35 f = open(file, 'wb') |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
36 f.write(r.content) |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
37 f.close() |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
38 return file |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
39 |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
40 def download_entries(url, filename, workdir): |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
41 full_filename = os_path.abspath(os_path.join(workdir, filename+".json.gz")) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
42 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
43 download(url+filename +".json.gz", full_filename) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
44 data_manager_entry = {} |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
45 data_manager_entry["value"] = filename |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
46 data_manager_entry["name"] = filename |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
47 data_manager_entry["path"] = full_filename |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
48 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
49 # Make sure that less than 10 requests per second, as required by host (http://bigg.ucsd.edu/data_access) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
50 #if ix % 5 == 0: |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
51 # time.sleep(1) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
52 yield data_manager_entry |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
53 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
54 |
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
55 parser = argparse.ArgumentParser(description="Download a cache file") |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
56 parser.add_argument('-f','--filename', required=True, default=None, type=str, help="Cache filename to download") |
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
57 parser.add_argument('-o','--outfile', required=True, default=None, type=str, help="A filename where the downloaded file has stored into") |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
58 |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
59 args = parser.parse_args() |
|
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
60 |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
61 url= "https://gitlab.com/breakthewall/rrCache-data/-/raw/master/" |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
62 filename= args.filename |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
63 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
64 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
65 data_manager_json = {"data_tables": {}} |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
66 with open(args.outfile) as fh: |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
67 params = json.load(fh) |
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
68 |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
69 workdir = params["output_data"][0]["extra_files_path"] |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
70 os_mkdir(workdir) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
71 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
72 #full_filename=os_path.join(workdir,filename) #'${GALAXY_DATA_MANAGER_DATA_PATH}'+'/rpextractsink/cache/cid_strc.json.gz' |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
73 |
|
0
e0b92d203870
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
diff
changeset
|
74 |
|
1
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
75 entries = list(download_entries(url, filename, workdir)) |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
76 |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
77 data_manager_json["data_tables"]["cache"] = entries |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
78 with open(args.outfile, "w") as fh: |
|
35c33747b9e3
"planemo upload commit f40274f6b9f6a15eb4022aab21286d4c96cd8475-dirty"
tduigou
parents:
0
diff
changeset
|
79 json.dump(data_manager_json, fh, sort_keys=True) |
