view TrustStoreGalaxyBrowse.py @ 24:f4585bf75a8a

Give up, try to update Python instead.
author Catherine Wise <catherine.wise@csiro.au>
date Thu, 14 May 2015 12:03:13 +1000
parents 10ce45af14dd
children 18fe446f2d02
line wrap: on
line source

"""TrustStore downloaded for Galaxy."""
from __future__ import division, absolute_import, print_function, unicode_literals
import sys
import shutil
import gzip
import tempfile
import os
import json
import operator
import urlparse
from py_ts import TrustStoreClient, utils
from galaxy.datatypes.checkers import util

# Tell urllib3 to use pyOpenSSL because we are on old Python stdlib.
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()

CLIENT_KEY = "desktop"
CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
CHUNK_SIZE = 2**20 # 1Mb
SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '

def print_nice(elem, f, depth):
    """Print the file name."""
    try:
        f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
    except AttributeError:
        f.write('\t'*depth + elem.name + "\n")
        for child in elem.children:
            print_nice(child, f, depth+1)

def check_gzip(file_path):
    """Check if file is gziped."""
    try:
        temp = open(file_path, "U")
        magic_check = temp.read(2)
        temp.close()
        if magic_check != util.gzip_magic:
            return (False, False)
    except Exception:
        return (False, False)
    return (True, True)

def ungzip(download, outputFile):
    """Uncompress file."""
    is_gzipped, is_valid = check_gzip(download)

    if is_gzipped and not is_valid:
        print("File is compressed (gzip) but not valid.")
        sys.exit(4)
    elif is_gzipped and is_valid:
        # We need to uncompress the temp_name file, but BAM files must
        # remain compressed in the BGZF format
        file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
        gzipped_file = gzip.GzipFile(download, 'rb')
        while 1:
            try:
                chunk = gzipped_file.read(CHUNK_SIZE)
            except IOError:
                os.close(file_handle)
                os.remove(uncompressed)
                print('Problem decompressing gzipped data %s %s' % (download, outputFile))
                sys.exit(4)
            if not chunk:
                break
            os.write(file_handle, chunk)
        os.close(file_handle)
        gzipped_file.close()

        shutil.copy(uncompressed, outputFile)
        try:
            os.remove(uncompressed)
            os.remove(download)
        except OSError:
            pass
    else:
        shutil.copy(download, outputFile)

def construct_multi_filename(id, name, file_type):
    """ Implementation of *Number of Output datasets cannot be determined until
    tool run* from documentation_.
    .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
    """
    filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
    return filename

def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
    """ Return line separated JSON
    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
    """
    meta_dict = dict(type=ds_type,
                     ext=extesion,
                     filename=filename,
                     name=name,
                     metadata={})
    if primary:
        meta_dict['base_dataset_id'] = dataset_id
    else:
        meta_dict['dataset_id'] = dataset_id
    return "%s\n" % json.dumps(meta_dict)

def main():
    properties_file = sys.argv[1]
    json_params = None
    metadata_path = None
    all_params = None
    with open(properties_file, 'r') as file_:
        settings = file_.read()
        print(settings)
        all_params = json.loads(settings)
        json_params = all_params.get("param_dict")
        metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]

    output_filename = json_params.get('output', None)
    output_data = all_params.get('output_data')
    extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
      operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])

    url_params = urlparse.unquote(json_params['URL']).split(";")
    if len(url_params) < 3:
        print("The url we got back is malformed: "+ json_params['URL'])
        sys.exit(5)
    short_url = url_params[0]
    username = url_params[1]
    password = url_params[2]
    if "/short" not in short_url:
        print("The url we got back is malformed: " + json_params['URL'])
        sys.exit(5)
    kms_url = short_url.split("/short")[0]

    config = TrustStoreClient.Config(
        None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir='/mnt/galaxy/tmp')
    truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
    try:
        truststore.authenticate(username, password)
    except TrustStoreClient.TrustStoreClientAuthenticationException as err:
        print(err)
        sys.exit(5)
    truststore.getPrivateKey('privkey.pem')

    path_texts = truststore.lengthenPath(short_url)
    if len(path_texts) < 2:
        print("The path we got was malformed.")
        sys.exit(3)
    paths = path_texts[1:]
    store_id = path_texts[0]

    store = truststore.getStoreByID(store_id)
    if store is None:
        print("Coudn't find store with that ID, or don't have access.")
        sys.exit(2)
    root = truststore.listDirectory(store)

    first = True

    with open(metadata_path, 'wb') as metadata_file:
        for path in paths:
            locations = utils.Navigation.files_at_path(root, path)
            for location in locations:
                if not locations:
                    print("Path not found: " + path)
                    continue
                filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
                extension = os.path.splitext(filename)[1]
                name = construct_multi_filename(hda_id, filename, extension)
                target_output_filename = None
                if first:
                    target_output_filename = file_name
                    first = False
                else:
                    target_output_filename = os.path.normpath("/".join([extra_files_path, name]))
                metadata_file.write(
                    metadata_to_json(dataset_id, filename, name, extension, target_output_filename))
                download = truststore.getFile(store, location)
                if download is None:
                    print("File %s not found." % location.name)
                    sys.exit(4)
                ungzip(download, target_output_filename)


if __name__ == '__main__':
    main()