Mercurial > repos > cathywise > truststore_browse
annotate TrustStoreGalaxyBrowse.py @ 23:194cf7e7e936 default tip
Format hacks yo.
| author | Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au> |
|---|---|
| date | Thu, 25 Jun 2015 14:06:10 +1000 |
| parents | fb915c3aefb2 |
| children |
| rev | line source |
|---|---|
| 0 | 1 """TrustStore downloaded for Galaxy.""" |
| 2 from __future__ import division, absolute_import, print_function, unicode_literals | |
| 3 import sys | |
| 4 import shutil | |
| 5 import gzip | |
| 6 import tempfile | |
| 7 import os | |
| 8 import json | |
| 9 import operator | |
| 10 import urlparse | |
| 11 from py_ts import TrustStoreClient, utils | |
|
14
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
12 # import galaxy.model # need to import model before sniff to resolve a circular import dependency |
| 0 | 13 from galaxy.datatypes.checkers import util |
|
14
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
14 # from galaxy.datatypes import sniff |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
15 # from galaxy.datatypes.registry import Registry |
| 0 | 16 |
| 17 # Tell urllib3 to use pyOpenSSL because we are on old Python stdlib. | |
| 18 # import urllib3.contrib.pyopenssl | |
| 19 # urllib3.contrib.pyopenssl.inject_into_urllib3() | |
| 20 # | |
| 21 os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt" | |
| 22 | |
| 23 CLIENT_KEY = "desktop" | |
| 24 CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg==" | |
| 25 CHUNK_SIZE = 2**20 # 1Mb | |
| 26 SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' | |
| 27 | |
| 28 def print_nice(elem, f, depth): | |
| 29 """Print the file name.""" | |
| 30 try: | |
| 31 f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n") | |
| 32 except AttributeError: | |
| 33 f.write('\t'*depth + elem.name + "\n") | |
| 34 for child in elem.children: | |
| 35 print_nice(child, f, depth+1) | |
| 36 | |
| 37 def check_gzip(file_path): | |
| 38 """Check if file is gziped.""" | |
| 39 try: | |
| 40 temp = open(file_path, "U") | |
| 41 magic_check = temp.read(2) | |
| 42 temp.close() | |
| 43 if magic_check != util.gzip_magic: | |
| 44 return (False, False) | |
| 45 except Exception: | |
| 46 return (False, False) | |
| 47 return (True, True) | |
| 48 | |
| 49 def ungzip(download, outputFile): | |
| 50 """Uncompress file.""" | |
| 51 is_gzipped, is_valid = check_gzip(download) | |
| 52 | |
| 53 if is_gzipped and not is_valid: | |
| 54 print("File is compressed (gzip) but not valid.") | |
| 55 sys.exit(4) | |
| 56 elif is_gzipped and is_valid: | |
| 57 # We need to uncompress the temp_name file, but BAM files must | |
| 58 # remain compressed in the BGZF format | |
| 59 file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False ) | |
| 60 gzipped_file = gzip.GzipFile(download, 'rb') | |
| 61 while 1: | |
| 62 try: | |
| 63 chunk = gzipped_file.read(CHUNK_SIZE) | |
| 64 except IOError: | |
| 65 os.close(file_handle) | |
| 66 os.remove(uncompressed) | |
| 67 print('Problem decompressing gzipped data %s %s' % (download, outputFile)) | |
| 68 sys.exit(4) | |
| 69 if not chunk: | |
| 70 break | |
| 71 os.write(file_handle, chunk) | |
| 72 os.close(file_handle) | |
| 73 gzipped_file.close() | |
| 74 | |
|
18
dd8e0066e9f5
I wish there was an easier way to test this. Ugh.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
17
diff
changeset
|
75 try: |
|
dd8e0066e9f5
I wish there was an easier way to test this. Ugh.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
17
diff
changeset
|
76 shutil.copy(uncompressed, outputFile) |
|
dd8e0066e9f5
I wish there was an easier way to test this. Ugh.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
17
diff
changeset
|
77 except shutil.Error: |
|
dd8e0066e9f5
I wish there was an easier way to test this. Ugh.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
17
diff
changeset
|
78 pass # If the file is already in the right location, move along. |
| 0 | 79 try: |
| 80 os.remove(uncompressed) | |
| 81 os.remove(download) | |
| 82 except OSError: | |
| 83 pass | |
| 84 else: | |
|
19
bae7471a4741
Again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
18
diff
changeset
|
85 try: |
|
bae7471a4741
Again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
18
diff
changeset
|
86 shutil.copy(download, outputFile) |
|
bae7471a4741
Again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
18
diff
changeset
|
87 except shutil.Error: |
|
bae7471a4741
Again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
18
diff
changeset
|
88 pass # If the file is already in the right location, move along. |
| 0 | 89 |
| 90 def construct_multi_filename(id, name, file_type): | |
| 91 """ Implementation of *Number of Output datasets cannot be determined until | |
| 92 tool run* from documentation_. | |
| 93 .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files | |
| 94 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py | |
| 95 """ | |
| 96 filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type) | |
| 97 return filename | |
| 98 | |
|
22
fb915c3aefb2
.fa => fasta
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
21
diff
changeset
|
99 def metadata_to_json(dataset_id, filename, name, extension, ds_type='dataset', primary=False): |
| 0 | 100 """ Return line separated JSON |
| 101 From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py | |
| 102 """ | |
|
22
fb915c3aefb2
.fa => fasta
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
21
diff
changeset
|
103 ext = extension |
|
fb915c3aefb2
.fa => fasta
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
21
diff
changeset
|
104 if ext == 'fa': |
|
fb915c3aefb2
.fa => fasta
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
21
diff
changeset
|
105 ext = 'fasta' |
| 0 | 106 meta_dict = dict(type=ds_type, |
|
23
194cf7e7e936
Format hacks yo.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
22
diff
changeset
|
107 ext=ext, |
| 0 | 108 filename=filename, |
| 109 name=name, | |
| 110 metadata={}) | |
| 111 if primary: | |
| 112 meta_dict['base_dataset_id'] = dataset_id | |
| 113 else: | |
| 114 meta_dict['dataset_id'] = dataset_id | |
| 115 return "%s\n" % json.dumps(meta_dict) | |
| 116 | |
| 117 def main(): | |
| 118 properties_file = sys.argv[1] | |
| 119 json_params = None | |
| 120 metadata_path = None | |
| 121 all_params = None | |
| 122 with open(properties_file, 'r') as file_: | |
| 123 settings = file_.read() | |
| 124 all_params = json.loads(settings) | |
| 125 json_params = all_params.get("param_dict") | |
| 126 metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"] | |
| 127 | |
| 128 output_filename = json_params.get('output', None) | |
| 129 output_data = all_params.get('output_data') | |
| 130 extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \ | |
| 131 operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0]) | |
|
4
2ca750b9083c
Fix extra files directory.
Catherine Wise <catherine.wise@csiro.au>
parents:
3
diff
changeset
|
132 extra_files_path = json_params['__new_file_path__'] |
| 0 | 133 |
|
14
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
134 # datatypes_registry = Registry() |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
135 # datatypes_registry.load_datatypes( |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
136 # root_dir=all_params['job_config']['GALAXY_ROOT_DIR'], |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
137 # config=all_params['job_config']['GALAXY_DATATYPES_CONF_FILE'] |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
138 # ) |
|
9
3e8bd0d01725
Attempt to guess data type.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
4
diff
changeset
|
139 |
| 0 | 140 url_params = urlparse.unquote(json_params['URL']).split(";") |
| 141 if len(url_params) < 3: | |
| 142 print("The url we got back is malformed: "+ json_params['URL']) | |
| 143 sys.exit(5) | |
| 144 short_url = url_params[0] | |
| 145 username = url_params[1] | |
| 146 password = url_params[2] | |
| 147 if "/short" not in short_url: | |
| 148 print("The url we got back is malformed: " + json_params['URL']) | |
| 149 sys.exit(5) | |
| 150 kms_url = short_url.split("/short")[0] | |
| 151 | |
| 152 tmp_dir = '/mnt/galaxy/tmp' | |
| 153 tmp = None | |
| 154 if os.path.exists(tmp_dir): | |
| 155 tmp = tmp_dir | |
| 156 | |
| 157 config = TrustStoreClient.Config( | |
| 158 None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp) | |
| 159 truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config) | |
| 160 try: | |
| 161 truststore.authenticate(username, password) | |
| 162 except TrustStoreClient.TrustStoreClientAuthenticationException as err: | |
| 163 print(err) | |
| 164 sys.exit(5) | |
| 165 truststore.getPrivateKey('privkey.pem') | |
| 166 | |
| 167 path_texts = truststore.lengthenPath(short_url) | |
| 168 if len(path_texts) < 2: | |
| 169 print("The path we got was malformed: " + str(path_texts)) | |
| 170 sys.exit(3) | |
| 171 paths = path_texts[1:] | |
| 172 store_id = path_texts[0] | |
| 173 | |
| 174 store = truststore.getStoreByID(store_id) | |
| 175 if store is None: | |
| 176 print("Coudn't find store with that ID, or don't have access.") | |
| 177 sys.exit(2) | |
| 178 root = truststore.listDirectory(store) | |
| 179 | |
| 180 first = True | |
| 181 | |
| 182 print("Preparing the following for downloading: " + str(paths)) | |
| 183 | |
|
16
bc3e29a97709
Make first file empty.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
15
diff
changeset
|
184 # Empty parameter file. |
|
bc3e29a97709
Make first file empty.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
15
diff
changeset
|
185 open(file_name, 'w' ).close() |
|
bc3e29a97709
Make first file empty.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
15
diff
changeset
|
186 |
| 0 | 187 if root is not None: |
| 188 with open(metadata_path, 'wb') as metadata_file: | |
| 189 for path in paths: | |
| 190 locations = utils.Navigation.files_at_path(root, path) | |
| 191 if not locations or locations == []: | |
| 192 print("Path not found: " + path) | |
| 193 print("In root: " + str(root)) | |
| 194 else: | |
| 195 print("Downloading file..." + ", ".join([loc.name for loc in locations])) | |
| 196 for location in locations: | |
| 197 filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name) | |
|
3
34bdad74ec64
Fix json output again.
Catherine Wise <catherine.wise@csiro.au>
parents:
2
diff
changeset
|
198 extension = os.path.splitext(filename)[1].strip(".") |
| 0 | 199 name = construct_multi_filename(hda_id, filename, extension) |
| 200 target_output_filename = None | |
|
3
34bdad74ec64
Fix json output again.
Catherine Wise <catherine.wise@csiro.au>
parents:
2
diff
changeset
|
201 data_type = "new_primary_dataset" |
|
15
8b88de25dd2c
More.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
14
diff
changeset
|
202 target_output_filename = os.path.normpath(os.path.join(extra_files_path, name)) |
| 0 | 203 download = truststore.getFile(store, location) |
|
21
4819fbb59766
*tear*
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
20
diff
changeset
|
204 primary = not first |
| 0 | 205 if download is None: |
| 206 print("File %s not found." % location.name) | |
| 207 sys.exit(4) | |
|
17
eefb47f4f438
Try again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
16
diff
changeset
|
208 if first: |
|
eefb47f4f438
Try again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
16
diff
changeset
|
209 first = False |
|
20
91bad9ac39c6
This is giving me a headache.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
19
diff
changeset
|
210 target_output_filename = file_name |
|
91bad9ac39c6
This is giving me a headache.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
19
diff
changeset
|
211 data_type = "dataset" |
| 0 | 212 ungzip(download, target_output_filename) |
|
20
91bad9ac39c6
This is giving me a headache.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
19
diff
changeset
|
213 metadata_file.write( |
|
91bad9ac39c6
This is giving me a headache.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
19
diff
changeset
|
214 metadata_to_json(dataset_id, target_output_filename, name, extension, data_type, primary=primary)) |
|
14
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
215 # ext = sniff.handle_uploaded_dataset_file(target_output_filename, datatypes_registry, ext=ext) |
|
c7287129f37f
And again.
Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
parents:
13
diff
changeset
|
216 # print("Guessed file type: " + ext) |
| 0 | 217 else: |
| 218 print("Store is damaged or we don't have sufficient access.") | |
| 219 sys.exit(4) | |
| 220 | |
| 221 | |
| 222 if __name__ == '__main__': | |
| 223 main() |
