Mercurial > repos > cathywise > truststore_browse
view TrustStoreGalaxyBrowse.py @ 23:194cf7e7e936 default tip
Format hacks yo.
| author | Wise, Catherine (Digital, Acton) <Catherine.Wise@csiro.au> | 
|---|---|
| date | Thu, 25 Jun 2015 14:06:10 +1000 | 
| parents | fb915c3aefb2 | 
| children | 
line wrap: on
 line source
"""TrustStore downloaded for Galaxy.""" from __future__ import division, absolute_import, print_function, unicode_literals import sys import shutil import gzip import tempfile import os import json import operator import urlparse from py_ts import TrustStoreClient, utils # import galaxy.model # need to import model before sniff to resolve a circular import dependency from galaxy.datatypes.checkers import util # from galaxy.datatypes import sniff # from galaxy.datatypes.registry import Registry # Tell urllib3 to use pyOpenSSL because we are on old Python stdlib. # import urllib3.contrib.pyopenssl # urllib3.contrib.pyopenssl.inject_into_urllib3() # os.environ["REQUESTS_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt" CLIENT_KEY = "desktop" CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg==" CHUNK_SIZE = 2**20 # 1Mb SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ ' def print_nice(elem, f, depth): """Print the file name.""" try: f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n") except AttributeError: f.write('\t'*depth + elem.name + "\n") for child in elem.children: print_nice(child, f, depth+1) def check_gzip(file_path): """Check if file is gziped.""" try: temp = open(file_path, "U") magic_check = temp.read(2) temp.close() if magic_check != util.gzip_magic: return (False, False) except Exception: return (False, False) return (True, True) def ungzip(download, outputFile): """Uncompress file.""" is_gzipped, is_valid = check_gzip(download) if is_gzipped and not is_valid: print("File is compressed (gzip) but not valid.") sys.exit(4) elif is_gzipped and is_valid: # We need to uncompress the temp_name file, but BAM files must # remain compressed in the BGZF format file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False ) gzipped_file = gzip.GzipFile(download, 'rb') while 1: try: chunk = gzipped_file.read(CHUNK_SIZE) except IOError: os.close(file_handle) os.remove(uncompressed) print('Problem decompressing gzipped data %s %s' % (download, outputFile)) sys.exit(4) if not chunk: break os.write(file_handle, chunk) os.close(file_handle) gzipped_file.close() try: shutil.copy(uncompressed, outputFile) except shutil.Error: pass # If the file is already in the right location, move along. try: os.remove(uncompressed) os.remove(download) except OSError: pass else: try: shutil.copy(download, outputFile) except shutil.Error: pass # If the file is already in the right location, move along. def construct_multi_filename(id, name, file_type): """ Implementation of *Number of Output datasets cannot be determined until tool run* from documentation_. .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py """ filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type) return filename def metadata_to_json(dataset_id, filename, name, extension, ds_type='dataset', primary=False): """ Return line separated JSON From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py """ ext = extension if ext == 'fa': ext = 'fasta' meta_dict = dict(type=ds_type, ext=ext, filename=filename, name=name, metadata={}) if primary: meta_dict['base_dataset_id'] = dataset_id else: meta_dict['dataset_id'] = dataset_id return "%s\n" % json.dumps(meta_dict) def main(): properties_file = sys.argv[1] json_params = None metadata_path = None all_params = None with open(properties_file, 'r') as file_: settings = file_.read() all_params = json.loads(settings) json_params = all_params.get("param_dict") metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"] output_filename = json_params.get('output', None) output_data = all_params.get('output_data') extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \ operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0]) extra_files_path = json_params['__new_file_path__'] # datatypes_registry = Registry() # datatypes_registry.load_datatypes( # root_dir=all_params['job_config']['GALAXY_ROOT_DIR'], # config=all_params['job_config']['GALAXY_DATATYPES_CONF_FILE'] # ) url_params = urlparse.unquote(json_params['URL']).split(";") if len(url_params) < 3: print("The url we got back is malformed: "+ json_params['URL']) sys.exit(5) short_url = url_params[0] username = url_params[1] password = url_params[2] if "/short" not in short_url: print("The url we got back is malformed: " + json_params['URL']) sys.exit(5) kms_url = short_url.split("/short")[0] tmp_dir = '/mnt/galaxy/tmp' tmp = None if os.path.exists(tmp_dir): tmp = tmp_dir config = TrustStoreClient.Config( None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir=tmp) truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config) try: truststore.authenticate(username, password) except TrustStoreClient.TrustStoreClientAuthenticationException as err: print(err) sys.exit(5) truststore.getPrivateKey('privkey.pem') path_texts = truststore.lengthenPath(short_url) if len(path_texts) < 2: print("The path we got was malformed: " + str(path_texts)) sys.exit(3) paths = path_texts[1:] store_id = path_texts[0] store = truststore.getStoreByID(store_id) if store is None: print("Coudn't find store with that ID, or don't have access.") sys.exit(2) root = truststore.listDirectory(store) first = True print("Preparing the following for downloading: " + str(paths)) # Empty parameter file. open(file_name, 'w' ).close() if root is not None: with open(metadata_path, 'wb') as metadata_file: for path in paths: locations = utils.Navigation.files_at_path(root, path) if not locations or locations == []: print("Path not found: " + path) print("In root: " + str(root)) else: print("Downloading file..." + ", ".join([loc.name for loc in locations])) for location in locations: filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name) extension = os.path.splitext(filename)[1].strip(".") name = construct_multi_filename(hda_id, filename, extension) target_output_filename = None data_type = "new_primary_dataset" target_output_filename = os.path.normpath(os.path.join(extra_files_path, name)) download = truststore.getFile(store, location) primary = not first if download is None: print("File %s not found." % location.name) sys.exit(4) if first: first = False target_output_filename = file_name data_type = "dataset" ungzip(download, target_output_filename) metadata_file.write( metadata_to_json(dataset_id, target_output_filename, name, extension, data_type, primary=primary)) # ext = sniff.handle_uploaded_dataset_file(target_output_filename, datatypes_registry, ext=ext) # print("Guessed file type: " + ext) else: print("Store is damaged or we don't have sufficient access.") sys.exit(4) if __name__ == '__main__': main()
