Mercurial > repos > cathywise > truststore_browse
annotate TrustStoreGalaxyBrowse.py @ 10:7301c2e96fce
Fix circular dependency
author | Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au> |
---|---|
date | Thu, 25 Jun 2015 08:36:38 +1000 |
parents | 3e8bd0d01725 |
children | 9ada74e8915e |
rev | line source |
---|---|
0 | 1 """TrustStore downloaded for Galaxy.""" |
2 from __future__ import division, absolute_import, print_function, unicode_literals | |
3 import sys | |
4 import shutil | |
5 import gzip | |
6 import tempfile | |
7 import os | |
8 import json | |
9 import operator | |
10 import urlparse | |
11 from py_ts import TrustStoreClient, utils | |
10
7301c2e96fce
Fix circular dependency
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
9
diff
changeset
|
12 import galaxy.model # need to import model before sniff to resolve a circular import dependency |
0 | 13 from galaxy.datatypes.checkers import util |
9
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
14 from galaxy.datatypes import sniff |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
15 from galaxy.datatypes.registry import Registry |
0 | 16 |
17 # Tell urllib3 to use pyOpenSSL because we are on old Python stdlib. | |
18 # import urllib3.contrib.pyopenssl | |
19 # urllib3.contrib.pyopenssl.inject_into_urllib3() | |
20 # | |
21 os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt" | |
22 | |
23 CLIENT_KEY = "desktop" | |
24 CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg==" | |
25 CHUNK_SIZE = 2**20 # 1Mb | |
26 SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' | |
27 | |
28 def print_nice(elem, f, depth): | |
29 """Print the file name.""" | |
30 try: | |
31 f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n") | |
32 except AttributeError: | |
33 f.write('\t'*depth + elem.name + "\n") | |
34 for child in elem.children: | |
35 print_nice(child, f, depth+1) | |
36 | |
37 def check_gzip(file_path): | |
38 """Check if file is gziped.""" | |
39 try: | |
40 temp = open(file_path, "U") | |
41 magic_check = temp.read(2) | |
42 temp.close() | |
43 if magic_check != util.gzip_magic: | |
44 return (False, False) | |
45 except Exception: | |
46 return (False, False) | |
47 return (True, True) | |
48 | |
49 def ungzip(download, outputFile): | |
50 """Uncompress file.""" | |
51 is_gzipped, is_valid = check_gzip(download) | |
52 | |
53 if is_gzipped and not is_valid: | |
54 print("File is compressed (gzip) but not valid.") | |
55 sys.exit(4) | |
56 elif is_gzipped and is_valid: | |
57 # We need to uncompress the temp_name file, but BAM files must | |
58 # remain compressed in the BGZF format | |
59 file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False ) | |
60 gzipped_file = gzip.GzipFile(download, 'rb') | |
61 while 1: | |
62 try: | |
63 chunk = gzipped_file.read(CHUNK_SIZE) | |
64 except IOError: | |
65 os.close(file_handle) | |
66 os.remove(uncompressed) | |
67 print('Problem decompressing gzipped data %s %s' % (download, outputFile)) | |
68 sys.exit(4) | |
69 if not chunk: | |
70 break | |
71 os.write(file_handle, chunk) | |
72 os.close(file_handle) | |
73 gzipped_file.close() | |
74 | |
75 shutil.copy(uncompressed, outputFile) | |
76 try: | |
77 os.remove(uncompressed) | |
78 os.remove(download) | |
79 except OSError: | |
80 pass | |
81 else: | |
82 shutil.copy(download, outputFile) | |
83 | |
84 def construct_multi_filename(id, name, file_type): | |
85 """ Implementation of *Number of Output datasets cannot be determined until | |
86 tool run* from documentation_. | |
87 .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files | |
88 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py | |
89 """ | |
90 filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type) | |
91 return filename | |
92 | |
93 def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False): | |
94 """ Return line separated JSON | |
95 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py | |
96 """ | |
97 meta_dict = dict(type=ds_type, | |
98 ext=extesion, | |
99 filename=filename, | |
100 name=name, | |
101 metadata={}) | |
102 if primary: | |
103 meta_dict['base_dataset_id'] = dataset_id | |
104 else: | |
105 meta_dict['dataset_id'] = dataset_id | |
106 return "%s\n" % json.dumps(meta_dict) | |
107 | |
108 def main(): | |
109 properties_file = sys.argv[1] | |
110 json_params = None | |
111 metadata_path = None | |
112 all_params = None | |
113 with open(properties_file, 'r') as file_: | |
114 settings = file_.read() | |
115 all_params = json.loads(settings) | |
116 json_params = all_params.get("param_dict") | |
117 metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"] | |
118 | |
119 output_filename = json_params.get('output', None) | |
120 output_data = all_params.get('output_data') | |
121 extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \ | |
122 operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0]) | |
4
2ca750b9083c
Fix extra files directory.
Catherine Wise <catherine.wise@csiro.au>
parents:
3
diff
changeset
|
123 extra_files_path = json_params['__new_file_path__'] |
0 | 124 |
9
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
125 datatypes_registry = Registry() |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
126 datatypes_registry.load_datatypes( |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
127 root_dir=all_params['job_config']['GALAXY_ROOT_DIR'], |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
128 config=all_params['job_config']['GALAXY_DATATYPES_CONF_FILE'] |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
129 ) |
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
130 |
0 | 131 url_params = urlparse.unquote(json_params['URL']).split(";") |
132 if len(url_params) < 3: | |
133 print("The url we got back is malformed: "+ json_params['URL']) | |
134 sys.exit(5) | |
135 short_url = url_params[0] | |
136 username = url_params[1] | |
137 password = url_params[2] | |
138 if "/short" not in short_url: | |
139 print("The url we got back is malformed: " + json_params['URL']) | |
140 sys.exit(5) | |
141 kms_url = short_url.split("/short")[0] | |
142 | |
143 tmp_dir = '/mnt/galaxy/tmp' | |
144 tmp = None | |
145 if os.path.exists(tmp_dir): | |
146 tmp = tmp_dir | |
147 | |
148 config = TrustStoreClient.Config( | |
149 None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp) | |
150 truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config) | |
151 try: | |
152 truststore.authenticate(username, password) | |
153 except TrustStoreClient.TrustStoreClientAuthenticationException as err: | |
154 print(err) | |
155 sys.exit(5) | |
156 truststore.getPrivateKey('privkey.pem') | |
157 | |
158 path_texts = truststore.lengthenPath(short_url) | |
159 if len(path_texts) < 2: | |
160 print("The path we got was malformed: " + str(path_texts)) | |
161 sys.exit(3) | |
162 paths = path_texts[1:] | |
163 store_id = path_texts[0] | |
164 | |
165 store = truststore.getStoreByID(store_id) | |
166 if store is None: | |
167 print("Coudn't find store with that ID, or don't have access.") | |
168 sys.exit(2) | |
169 root = truststore.listDirectory(store) | |
170 | |
171 first = True | |
172 | |
173 print("Preparing the following for downloading: " + str(paths)) | |
174 | |
175 if root is not None: | |
176 with open(metadata_path, 'wb') as metadata_file: | |
177 for path in paths: | |
178 locations = utils.Navigation.files_at_path(root, path) | |
179 if not locations or locations == []: | |
180 print("Path not found: " + path) | |
181 print("In root: " + str(root)) | |
182 else: | |
183 print("Downloading file..." + ", ".join([loc.name for loc in locations])) | |
184 for location in locations: | |
185 filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name) | |
3
34bdad74ec64
Fix json output again.
Catherine Wise <catherine.wise@csiro.au>
parents:
2
diff
changeset
|
186 extension = os.path.splitext(filename)[1].strip(".") |
0 | 187 name = construct_multi_filename(hda_id, filename, extension) |
188 target_output_filename = None | |
3
34bdad74ec64
Fix json output again.
Catherine Wise <catherine.wise@csiro.au>
parents:
2
diff
changeset
|
189 data_type = "new_primary_dataset" |
0 | 190 if first: |
191 target_output_filename = file_name | |
2
3ff3e9b8794f
Fix output json format.
Catherine Wise <catherine.wise@csiro.au>
parents:
0
diff
changeset
|
192 dataset = "new_primary_dataset" |
0 | 193 first = False |
194 else: | |
3
34bdad74ec64
Fix json output again.
Catherine Wise <catherine.wise@csiro.au>
parents:
2
diff
changeset
|
195 target_output_filename = os.path.normpath(os.path.join(extra_files_path, name)) |
9
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
196 ext = sniff.handle_uploaded_dataset_file(filename, datatypes_registry, ext=ext) |
0 | 197 metadata_file.write( |
9
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
198 metadata_to_json(dataset_id, target_output_filename, name, ext, data_type)) |
0 | 199 download = truststore.getFile(store, location) |
200 if download is None: | |
201 print("File %s not found." % location.name) | |
202 sys.exit(4) | |
203 ungzip(download, target_output_filename) | |
204 else: | |
205 print("Store is damaged or we don't have sufficient access.") | |
206 sys.exit(4) | |
207 | |
208 | |
209 if __name__ == '__main__': | |
210 main() |