comparison TrustStoreGalaxyBrowse.py @ 0:07fda3da679b

init
author Catherine Wise <catherine.wise@csiro.au>
date Thu, 21 May 2015 08:34:17 +1000
parents
children 3ff3e9b8794f
comparison
equal deleted inserted replaced
-1:000000000000 0:07fda3da679b
1 """TrustStore downloaded for Galaxy."""
2 from __future__ import division, absolute_import, print_function, unicode_literals
3 import sys
4 import shutil
5 import gzip
6 import tempfile
7 import os
8 import json
9 import operator
10 import urlparse
11 from py_ts import TrustStoreClient, utils
12 from galaxy.datatypes.checkers import util
13
14 # Tell urllib3 to use pyOpenSSL because we are on old Python stdlib.
15 # import urllib3.contrib.pyopenssl
16 # urllib3.contrib.pyopenssl.inject_into_urllib3()
17 #
18 os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"
19
20 CLIENT_KEY = "desktop"
21 CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
22 CHUNK_SIZE = 2**20 # 1Mb
23 SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
24
25 def print_nice(elem, f, depth):
26 """Print the file name."""
27 try:
28 f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
29 except AttributeError:
30 f.write('\t'*depth + elem.name + "\n")
31 for child in elem.children:
32 print_nice(child, f, depth+1)
33
34 def check_gzip(file_path):
35 """Check if file is gziped."""
36 try:
37 temp = open(file_path, "U")
38 magic_check = temp.read(2)
39 temp.close()
40 if magic_check != util.gzip_magic:
41 return (False, False)
42 except Exception:
43 return (False, False)
44 return (True, True)
45
46 def ungzip(download, outputFile):
47 """Uncompress file."""
48 is_gzipped, is_valid = check_gzip(download)
49
50 if is_gzipped and not is_valid:
51 print("File is compressed (gzip) but not valid.")
52 sys.exit(4)
53 elif is_gzipped and is_valid:
54 # We need to uncompress the temp_name file, but BAM files must
55 # remain compressed in the BGZF format
56 file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
57 gzipped_file = gzip.GzipFile(download, 'rb')
58 while 1:
59 try:
60 chunk = gzipped_file.read(CHUNK_SIZE)
61 except IOError:
62 os.close(file_handle)
63 os.remove(uncompressed)
64 print('Problem decompressing gzipped data %s %s' % (download, outputFile))
65 sys.exit(4)
66 if not chunk:
67 break
68 os.write(file_handle, chunk)
69 os.close(file_handle)
70 gzipped_file.close()
71
72 shutil.copy(uncompressed, outputFile)
73 try:
74 os.remove(uncompressed)
75 os.remove(download)
76 except OSError:
77 pass
78 else:
79 shutil.copy(download, outputFile)
80
81 def construct_multi_filename(id, name, file_type):
82 """ Implementation of *Number of Output datasets cannot be determined until
83 tool run* from documentation_.
84 .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
85 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
86 """
87 filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
88 return filename
89
90 def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
91 """ Return line separated JSON
92 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
93 """
94 meta_dict = dict(type=ds_type,
95 ext=extesion,
96 filename=filename,
97 name=name,
98 metadata={})
99 if primary:
100 meta_dict['base_dataset_id'] = dataset_id
101 else:
102 meta_dict['dataset_id'] = dataset_id
103 return "%s\n" % json.dumps(meta_dict)
104
105 def main():
106 properties_file = sys.argv[1]
107 json_params = None
108 metadata_path = None
109 all_params = None
110 with open(properties_file, 'r') as file_:
111 settings = file_.read()
112 all_params = json.loads(settings)
113 json_params = all_params.get("param_dict")
114 metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]
115
116 output_filename = json_params.get('output', None)
117 output_data = all_params.get('output_data')
118 extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
119 operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])
120
121 url_params = urlparse.unquote(json_params['URL']).split(";")
122 if len(url_params) < 3:
123 print("The url we got back is malformed: "+ json_params['URL'])
124 sys.exit(5)
125 short_url = url_params[0]
126 username = url_params[1]
127 password = url_params[2]
128 if "/short" not in short_url:
129 print("The url we got back is malformed: " + json_params['URL'])
130 sys.exit(5)
131 kms_url = short_url.split("/short")[0]
132
133 tmp_dir = '/mnt/galaxy/tmp'
134 tmp = None
135 if os.path.exists(tmp_dir):
136 tmp = tmp_dir
137
138 config = TrustStoreClient.Config(
139 None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp)
140 truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
141 try:
142 truststore.authenticate(username, password)
143 except TrustStoreClient.TrustStoreClientAuthenticationException as err:
144 print(err)
145 sys.exit(5)
146 truststore.getPrivateKey('privkey.pem')
147
148 path_texts = truststore.lengthenPath(short_url)
149 if len(path_texts) < 2:
150 print("The path we got was malformed: " + str(path_texts))
151 sys.exit(3)
152 paths = path_texts[1:]
153 store_id = path_texts[0]
154
155 store = truststore.getStoreByID(store_id)
156 if store is None:
157 print("Coudn't find store with that ID, or don't have access.")
158 sys.exit(2)
159 root = truststore.listDirectory(store)
160
161 first = True
162
163 print("Preparing the following for downloading: " + str(paths))
164
165 if root is not None:
166 with open(metadata_path, 'wb') as metadata_file:
167 for path in paths:
168 locations = utils.Navigation.files_at_path(root, path)
169 if not locations or locations == []:
170 print("Path not found: " + path)
171 print("In root: " + str(root))
172 else:
173 print("Downloading file..." + ", ".join([loc.name for loc in locations]))
174 for location in locations:
175 filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
176 extension = os.path.splitext(filename)[1]
177 name = construct_multi_filename(hda_id, filename, extension)
178 target_output_filename = None
179 if first:
180 target_output_filename = file_name
181 first = False
182 else:
183 target_output_filename = os.path.normpath("/".join([extra_files_path, name]))
184 metadata_file.write(
185 metadata_to_json(dataset_id, filename, name, extension, target_output_filename))
186 download = truststore.getFile(store, location)
187 if download is None:
188 print("File %s not found." % location.name)
189 sys.exit(4)
190 ungzip(download, target_output_filename)
191 else:
192 print("Store is damaged or we don't have sufficient access.")
193 sys.exit(4)
194
195
196 if __name__ == '__main__':
197 main()