view TrustStoreGalaxyBrowse.py @ 9:3e8bd0d01725

Attempt to guess data type.
author Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au>
date Thu, 25 Jun 2015 08:26:34 +1000
parents 2ca750b9083c
children 7301c2e96fce
line wrap: on
line source

"""TrustStore downloaded for Galaxy."""
from __future__ import division, absolute_import, print_function, unicode_literals
import sys
import shutil
import gzip
import tempfile
import os
import json
import operator
import urlparse
from py_ts import TrustStoreClient, utils
from galaxy.datatypes.checkers import util
from galaxy.datatypes import sniff
from galaxy.datatypes.registry import Registry

# Tell urllib3 to use pyOpenSSL because we are on old Python stdlib.
# import urllib3.contrib.pyopenssl
# urllib3.contrib.pyopenssl.inject_into_urllib3()
# 
os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"

CLIENT_KEY = "desktop"
CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
CHUNK_SIZE = 2**20 # 1Mb
SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '

def print_nice(elem, f, depth):
    """Print the file name."""
    try:
        f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
    except AttributeError:
        f.write('\t'*depth + elem.name + "\n")
        for child in elem.children:
            print_nice(child, f, depth+1)

def check_gzip(file_path):
    """Check if file is gziped."""
    try:
        temp = open(file_path, "U")
        magic_check = temp.read(2)
        temp.close()
        if magic_check != util.gzip_magic:
            return (False, False)
    except Exception:
        return (False, False)
    return (True, True)

def ungzip(download, outputFile):
    """Uncompress file."""
    is_gzipped, is_valid = check_gzip(download)

    if is_gzipped and not is_valid:
        print("File is compressed (gzip) but not valid.")
        sys.exit(4)
    elif is_gzipped and is_valid:
        # We need to uncompress the temp_name file, but BAM files must
        # remain compressed in the BGZF format
        file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
        gzipped_file = gzip.GzipFile(download, 'rb')
        while 1:
            try:
                chunk = gzipped_file.read(CHUNK_SIZE)
            except IOError:
                os.close(file_handle)
                os.remove(uncompressed)
                print('Problem decompressing gzipped data %s %s' % (download, outputFile))
                sys.exit(4)
            if not chunk:
                break
            os.write(file_handle, chunk)
        os.close(file_handle)
        gzipped_file.close()

        shutil.copy(uncompressed, outputFile)
        try:
            os.remove(uncompressed)
            os.remove(download)
        except OSError:
            pass
    else:
        shutil.copy(download, outputFile)

def construct_multi_filename(id, name, file_type):
    """ Implementation of *Number of Output datasets cannot be determined until
    tool run* from documentation_.
    .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
    """
    filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
    return filename

def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
    """ Return line separated JSON
    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
    """
    meta_dict = dict(type=ds_type,
                     ext=extesion,
                     filename=filename,
                     name=name,
                     metadata={})
    if primary:
        meta_dict['base_dataset_id'] = dataset_id
    else:
        meta_dict['dataset_id'] = dataset_id
    return "%s\n" % json.dumps(meta_dict)

def main():
    properties_file = sys.argv[1]
    json_params = None
    metadata_path = None
    all_params = None
    with open(properties_file, 'r') as file_:
        settings = file_.read()
        all_params = json.loads(settings)
        json_params = all_params.get("param_dict")
        metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]

    output_filename = json_params.get('output', None)
    output_data = all_params.get('output_data')
    extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
      operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])
    extra_files_path = json_params['__new_file_path__']

    datatypes_registry = Registry()
    datatypes_registry.load_datatypes(
        root_dir=all_params['job_config']['GALAXY_ROOT_DIR'],
        config=all_params['job_config']['GALAXY_DATATYPES_CONF_FILE']
    )

    url_params = urlparse.unquote(json_params['URL']).split(";")
    if len(url_params) < 3:
        print("The url we got back is malformed: "+ json_params['URL'])
        sys.exit(5)
    short_url = url_params[0]
    username = url_params[1]
    password = url_params[2]
    if "/short" not in short_url:
        print("The url we got back is malformed: " + json_params['URL'])
        sys.exit(5)
    kms_url = short_url.split("/short")[0]

    tmp_dir = '/mnt/galaxy/tmp'
    tmp = None
    if os.path.exists(tmp_dir):
        tmp = tmp_dir

    config = TrustStoreClient.Config(
        None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp)
    truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
    try:
        truststore.authenticate(username, password)
    except TrustStoreClient.TrustStoreClientAuthenticationException as err:
        print(err)
        sys.exit(5)
    truststore.getPrivateKey('privkey.pem')

    path_texts = truststore.lengthenPath(short_url)
    if len(path_texts) < 2:
        print("The path we got was malformed: " + str(path_texts))
        sys.exit(3)
    paths = path_texts[1:]
    store_id = path_texts[0]

    store = truststore.getStoreByID(store_id)
    if store is None:
        print("Coudn't find store with that ID, or don't have access.")
        sys.exit(2)
    root = truststore.listDirectory(store)

    first = True

    print("Preparing the following for downloading: " + str(paths))

    if root is not None:
        with open(metadata_path, 'wb') as metadata_file:
            for path in paths:
                locations = utils.Navigation.files_at_path(root, path)
                if not locations or locations == []:
                    print("Path not found: " + path)
                    print("In root: " + str(root))
                else:
                    print("Downloading file..." + ", ".join([loc.name for loc in locations]))
                for location in locations:
                    filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
                    extension = os.path.splitext(filename)[1].strip(".")
                    name = construct_multi_filename(hda_id, filename, extension)
                    target_output_filename = None
                    data_type = "new_primary_dataset"
                    if first:
                        target_output_filename = file_name
                        dataset = "new_primary_dataset"
                        first = False
                    else:
                        target_output_filename = os.path.normpath(os.path.join(extra_files_path, name))
                    ext = sniff.handle_uploaded_dataset_file(filename, datatypes_registry, ext=ext)
                    metadata_file.write(
                        metadata_to_json(dataset_id, target_output_filename, name, ext, data_type))
                    download = truststore.getFile(store, location)
                    if download is None:
                        print("File %s not found." % location.name)
                        sys.exit(4)
                    ungzip(download, target_output_filename)
    else:
        print("Store is damaged or we don't have sufficient access.")
        sys.exit(4)


if __name__ == '__main__':
    main()