annotate TrustStoreGalaxyBrowse.py @ 2:3ff3e9b8794f

Fix output json format.
author Catherine Wise <catherine.wise@csiro.au>
date Thu, 21 May 2015 14:21:03 +1000
parents 07fda3da679b
children 34bdad74ec64
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
1 """TrustStore downloaded for Galaxy."""
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
2 from __future__ import division, absolute_import, print_function, unicode_literals
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
3 import sys
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
4 import shutil
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
5 import gzip
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
6 import tempfile
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
7 import os
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
8 import json
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
9 import operator
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
10 import urlparse
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
11 from py_ts import TrustStoreClient, utils
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
12 from galaxy.datatypes.checkers import util
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
13
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
14 # Tell urllib3 to use pyOpenSSL because we are on old Python stdlib.
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
15 # import urllib3.contrib.pyopenssl
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
16 # urllib3.contrib.pyopenssl.inject_into_urllib3()
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
17 #
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
18 os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
19
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
20 CLIENT_KEY = "desktop"
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
21 CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
22 CHUNK_SIZE = 2**20 # 1Mb
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
23 SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
24
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
25 def print_nice(elem, f, depth):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
26 """Print the file name."""
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
27 try:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
28 f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
29 except AttributeError:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
30 f.write('\t'*depth + elem.name + "\n")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
31 for child in elem.children:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
32 print_nice(child, f, depth+1)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
33
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
34 def check_gzip(file_path):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
35 """Check if file is gziped."""
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
36 try:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
37 temp = open(file_path, "U")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
38 magic_check = temp.read(2)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
39 temp.close()
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
40 if magic_check != util.gzip_magic:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
41 return (False, False)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
42 except Exception:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
43 return (False, False)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
44 return (True, True)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
45
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
46 def ungzip(download, outputFile):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
47 """Uncompress file."""
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
48 is_gzipped, is_valid = check_gzip(download)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
49
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
50 if is_gzipped and not is_valid:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
51 print("File is compressed (gzip) but not valid.")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
52 sys.exit(4)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
53 elif is_gzipped and is_valid:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
54 # We need to uncompress the temp_name file, but BAM files must
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
55 # remain compressed in the BGZF format
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
56 file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
57 gzipped_file = gzip.GzipFile(download, 'rb')
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
58 while 1:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
59 try:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
60 chunk = gzipped_file.read(CHUNK_SIZE)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
61 except IOError:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
62 os.close(file_handle)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
63 os.remove(uncompressed)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
64 print('Problem decompressing gzipped data %s %s' % (download, outputFile))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
65 sys.exit(4)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
66 if not chunk:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
67 break
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
68 os.write(file_handle, chunk)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
69 os.close(file_handle)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
70 gzipped_file.close()
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
71
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
72 shutil.copy(uncompressed, outputFile)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
73 try:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
74 os.remove(uncompressed)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
75 os.remove(download)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
76 except OSError:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
77 pass
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
78 else:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
79 shutil.copy(download, outputFile)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
80
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
81 def construct_multi_filename(id, name, file_type):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
82 """ Implementation of *Number of Output datasets cannot be determined until
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
83 tool run* from documentation_.
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
84 .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
85 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
86 """
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
87 filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
88 return filename
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
89
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
90 def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
91 """ Return line separated JSON
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
92 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
93 """
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
94 meta_dict = dict(type=ds_type,
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
95 ext=extesion,
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
96 filename=filename,
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
97 name=name,
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
98 metadata={})
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
99 if primary:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
100 meta_dict['base_dataset_id'] = dataset_id
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
101 else:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
102 meta_dict['dataset_id'] = dataset_id
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
103 return "%s\n" % json.dumps(meta_dict)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
104
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
105 def main():
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
106 properties_file = sys.argv[1]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
107 json_params = None
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
108 metadata_path = None
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
109 all_params = None
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
110 with open(properties_file, 'r') as file_:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
111 settings = file_.read()
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
112 all_params = json.loads(settings)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
113 json_params = all_params.get("param_dict")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
114 metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
115
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
116 output_filename = json_params.get('output', None)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
117 output_data = all_params.get('output_data')
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
118 extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
119 operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
120
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
121 url_params = urlparse.unquote(json_params['URL']).split(";")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
122 if len(url_params) < 3:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
123 print("The url we got back is malformed: "+ json_params['URL'])
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
124 sys.exit(5)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
125 short_url = url_params[0]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
126 username = url_params[1]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
127 password = url_params[2]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
128 if "/short" not in short_url:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
129 print("The url we got back is malformed: " + json_params['URL'])
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
130 sys.exit(5)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
131 kms_url = short_url.split("/short")[0]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
132
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
133 tmp_dir = '/mnt/galaxy/tmp'
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
134 tmp = None
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
135 if os.path.exists(tmp_dir):
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
136 tmp = tmp_dir
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
137
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
138 config = TrustStoreClient.Config(
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
139 None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
140 truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
141 try:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
142 truststore.authenticate(username, password)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
143 except TrustStoreClient.TrustStoreClientAuthenticationException as err:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
144 print(err)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
145 sys.exit(5)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
146 truststore.getPrivateKey('privkey.pem')
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
147
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
148 path_texts = truststore.lengthenPath(short_url)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
149 if len(path_texts) < 2:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
150 print("The path we got was malformed: " + str(path_texts))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
151 sys.exit(3)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
152 paths = path_texts[1:]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
153 store_id = path_texts[0]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
154
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
155 store = truststore.getStoreByID(store_id)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
156 if store is None:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
157 print("Coudn't find store with that ID, or don't have access.")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
158 sys.exit(2)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
159 root = truststore.listDirectory(store)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
160
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
161 first = True
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
162
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
163 print("Preparing the following for downloading: " + str(paths))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
164
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
165 if root is not None:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
166 with open(metadata_path, 'wb') as metadata_file:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
167 for path in paths:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
168 locations = utils.Navigation.files_at_path(root, path)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
169 if not locations or locations == []:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
170 print("Path not found: " + path)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
171 print("In root: " + str(root))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
172 else:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
173 print("Downloading file..." + ", ".join([loc.name for loc in locations]))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
174 for location in locations:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
175 filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
176 extension = os.path.splitext(filename)[1]
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
177 name = construct_multi_filename(hda_id, filename, extension)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
178 target_output_filename = None
2
3ff3e9b8794f Fix output json format.
Catherine Wise <catherine.wise@csiro.au>
parents: 0
diff changeset
179 data_type = "dataset"
0
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
180 if first:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
181 target_output_filename = file_name
2
3ff3e9b8794f Fix output json format.
Catherine Wise <catherine.wise@csiro.au>
parents: 0
diff changeset
182 dataset = "new_primary_dataset"
0
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
183 first = False
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
184 else:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
185 target_output_filename = os.path.normpath("/".join([extra_files_path, name]))
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
186 metadata_file.write(
2
3ff3e9b8794f Fix output json format.
Catherine Wise <catherine.wise@csiro.au>
parents: 0
diff changeset
187 metadata_to_json(dataset_id, target_output_filename, name, extension, data_type))
0
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
188 download = truststore.getFile(store, location)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
189 if download is None:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
190 print("File %s not found." % location.name)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
191 sys.exit(4)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
192 ungzip(download, target_output_filename)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
193 else:
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
194 print("Store is damaged or we don't have sufficient access.")
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
195 sys.exit(4)
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
196
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
197
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
198 if __name__ == '__main__':
Catherine Wise <catherine.wise@csiro.au>
parents:
diff changeset
199 main()