diff TrustStoreGalaxyBrowse.py @ 0:07fda3da679b

init
author Catherine Wise <catherine.wise@csiro.au>
date Thu, 21 May 2015 08:34:17 +1000
parents
children 3ff3e9b8794f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/TrustStoreGalaxyBrowse.py	Thu May 21 08:34:17 2015 +1000
@@ -0,0 +1,197 @@
+"""TrustStore downloaded for Galaxy."""
+from __future__ import division, absolute_import, print_function, unicode_literals
+import sys
+import shutil
+import gzip
+import tempfile
+import os
+import json
+import operator
+import urlparse
+from py_ts import TrustStoreClient, utils
+from galaxy.datatypes.checkers import util
+
+# Tell urllib3 to use pyOpenSSL because we are on old Python stdlib.
+# import urllib3.contrib.pyopenssl
+# urllib3.contrib.pyopenssl.inject_into_urllib3()
+# 
+os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"
+
+CLIENT_KEY = "desktop"
+CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
+CHUNK_SIZE = 2**20 # 1Mb
+SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
+
+def print_nice(elem, f, depth):
+    """Print the file name."""
+    try:
+        f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
+    except AttributeError:
+        f.write('\t'*depth + elem.name + "\n")
+        for child in elem.children:
+            print_nice(child, f, depth+1)
+
+def check_gzip(file_path):
+    """Check if file is gziped."""
+    try:
+        temp = open(file_path, "U")
+        magic_check = temp.read(2)
+        temp.close()
+        if magic_check != util.gzip_magic:
+            return (False, False)
+    except Exception:
+        return (False, False)
+    return (True, True)
+
+def ungzip(download, outputFile):
+    """Uncompress file."""
+    is_gzipped, is_valid = check_gzip(download)
+
+    if is_gzipped and not is_valid:
+        print("File is compressed (gzip) but not valid.")
+        sys.exit(4)
+    elif is_gzipped and is_valid:
+        # We need to uncompress the temp_name file, but BAM files must
+        # remain compressed in the BGZF format
+        file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
+        gzipped_file = gzip.GzipFile(download, 'rb')
+        while 1:
+            try:
+                chunk = gzipped_file.read(CHUNK_SIZE)
+            except IOError:
+                os.close(file_handle)
+                os.remove(uncompressed)
+                print('Problem decompressing gzipped data %s %s' % (download, outputFile))
+                sys.exit(4)
+            if not chunk:
+                break
+            os.write(file_handle, chunk)
+        os.close(file_handle)
+        gzipped_file.close()
+
+        shutil.copy(uncompressed, outputFile)
+        try:
+            os.remove(uncompressed)
+            os.remove(download)
+        except OSError:
+            pass
+    else:
+        shutil.copy(download, outputFile)
+
+def construct_multi_filename(id, name, file_type):
+    """ Implementation of *Number of Output datasets cannot be determined until
+    tool run* from documentation_.
+    .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
+    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
+    """
+    filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
+    return filename
+
+def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
+    """ Return line separated JSON
+    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
+    """
+    meta_dict = dict(type=ds_type,
+                     ext=extesion,
+                     filename=filename,
+                     name=name,
+                     metadata={})
+    if primary:
+        meta_dict['base_dataset_id'] = dataset_id
+    else:
+        meta_dict['dataset_id'] = dataset_id
+    return "%s\n" % json.dumps(meta_dict)
+
+def main():
+    properties_file = sys.argv[1]
+    json_params = None
+    metadata_path = None
+    all_params = None
+    with open(properties_file, 'r') as file_:
+        settings = file_.read()
+        all_params = json.loads(settings)
+        json_params = all_params.get("param_dict")
+        metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]
+
+    output_filename = json_params.get('output', None)
+    output_data = all_params.get('output_data')
+    extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
+      operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])
+
+    url_params = urlparse.unquote(json_params['URL']).split(";")
+    if len(url_params) < 3:
+        print("The url we got back is malformed: "+ json_params['URL'])
+        sys.exit(5)
+    short_url = url_params[0]
+    username = url_params[1]
+    password = url_params[2]
+    if "/short" not in short_url:
+        print("The url we got back is malformed: " + json_params['URL'])
+        sys.exit(5)
+    kms_url = short_url.split("/short")[0]
+
+    tmp_dir = '/mnt/galaxy/tmp'
+    tmp = None
+    if os.path.exists(tmp_dir):
+        tmp = tmp_dir
+
+    config = TrustStoreClient.Config(
+        None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp)
+    truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
+    try:
+        truststore.authenticate(username, password)
+    except TrustStoreClient.TrustStoreClientAuthenticationException as err:
+        print(err)
+        sys.exit(5)
+    truststore.getPrivateKey('privkey.pem')
+
+    path_texts = truststore.lengthenPath(short_url)
+    if len(path_texts) < 2:
+        print("The path we got was malformed: " + str(path_texts))
+        sys.exit(3)
+    paths = path_texts[1:]
+    store_id = path_texts[0]
+
+    store = truststore.getStoreByID(store_id)
+    if store is None:
+        print("Coudn't find store with that ID, or don't have access.")
+        sys.exit(2)
+    root = truststore.listDirectory(store)
+
+    first = True
+
+    print("Preparing the following for downloading: " + str(paths))
+
+    if root is not None:
+        with open(metadata_path, 'wb') as metadata_file:
+            for path in paths:
+                locations = utils.Navigation.files_at_path(root, path)
+                if not locations or locations == []:
+                    print("Path not found: " + path)
+                    print("In root: " + str(root))
+                else:
+                    print("Downloading file..." + ", ".join([loc.name for loc in locations]))
+                for location in locations:
+                    filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
+                    extension = os.path.splitext(filename)[1]
+                    name = construct_multi_filename(hda_id, filename, extension)
+                    target_output_filename = None
+                    if first:
+                        target_output_filename = file_name
+                        first = False
+                    else:
+                        target_output_filename = os.path.normpath("/".join([extra_files_path, name]))
+                    metadata_file.write(
+                        metadata_to_json(dataset_id, filename, name, extension, target_output_filename))
+                    download = truststore.getFile(store, location)
+                    if download is None:
+                        print("File %s not found." % location.name)
+                        sys.exit(4)
+                    ungzip(download, target_output_filename)
+    else:
+        print("Store is damaged or we don't have sufficient access.")
+        sys.exit(4)
+
+
+if __name__ == '__main__':
+    main()