changeset 9:0f2a5664c9eb

Actually update downloader (probably not working).
author Catherine Wise <catherine.wise@csiro.au>
date Tue, 12 May 2015 11:35:04 +1000
parents 96847f227d20
children 16b0ceecf1f7
files TrustStoreGalaxyBrowse.py TrustStoreGalaxyBrowse.xml tool_dependencies.xml
diffstat 3 files changed, 130 insertions(+), 95 deletions(-) [+]
line wrap: on
line diff
--- a/TrustStoreGalaxyBrowse.py	Mon May 11 15:31:52 2015 +1000
+++ b/TrustStoreGalaxyBrowse.py	Tue May 12 11:35:04 2015 +1000
@@ -1,54 +1,65 @@
+"""TrustStore downloaded for Galaxy."""
 from __future__ import division, absolute_import, print_function, unicode_literals
 import sys
 import shutil
 import gzip
 import tempfile
 import os
+import json
+import operator
 from py_ts import TrustStoreClient, utils
-# from galaxy.datatypes.checkers import *
+from galaxy.datatypes.checkers import util
 
-def printNice(elem, f, depth):
+CLIENT_KEY = "desktop"
+CLIENT_SECRET = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
+CHUNK_SIZE = 2**20 # 1Mb
+SAFE_CHARS = '.-()[]0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ '
+
+def print_nice(elem, f, depth):
+    """Print the file name."""
     try:
         f.write('\t'*depth + elem.name + " (" + str(len(elem.fragments)) + " parts)\n")
     except AttributeError:
         f.write('\t'*depth + elem.name + "\n")
         for child in elem.children:
-            printNice(child, f, depth+1)
+            print_nice(child, f, depth+1)
 
 def check_gzip(file_path):
+    """Check if file is gziped."""
     try:
-        temp = open( file_path, "U" )
-        magic_check = temp.read( 2 )
+        temp = open(file_path, "U")
+        magic_check = temp.read(2)
         temp.close()
         if magic_check != util.gzip_magic:
-            return ( False, False )
-    except:
-        return ( False, False )
-    return ( True, True )
+            return (False, False)
+    except Exception:
+        return (False, False)
+    return (True, True)
 
 def ungzip(download, outputFile):
+    """Uncompress file."""
     is_gzipped, is_valid = check_gzip(download)
 
     if is_gzipped and not is_valid:
-        print "File is compressed (gzip) but not valid."
+        print("File is compressed (gzip) but not valid.")
         sys.exit(4)
     elif is_gzipped and is_valid:
-        # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
-        CHUNK_SIZE = 2**20 # 1Mb
-        fd, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
+        # We need to uncompress the temp_name file, but BAM files must
+        # remain compressed in the BGZF format
+        file_handle, uncompressed = tempfile.mkstemp(prefix='data_id_upload_gunzip_', dir=os.path.dirname(outputFile), text=False )
         gzipped_file = gzip.GzipFile(download, 'rb')
         while 1:
             try:
                 chunk = gzipped_file.read(CHUNK_SIZE)
             except IOError:
-                os.close(fd)
+                os.close(file_handle)
                 os.remove(uncompressed)
-                print 'Problem decompressing gzipped data', dataset, json_file
+                print('Problem decompressing gzipped data %s %s' % (download, outputFile))
                 sys.exit(4)
             if not chunk:
                 break
-            os.write(fd, chunk)
-        os.close(fd)
+            os.write(file_handle, chunk)
+        os.close(file_handle)
         gzipped_file.close()
 
         shutil.copy(uncompressed, outputFile)
@@ -56,79 +67,103 @@
             os.remove(uncompressed)
             os.remove(download)
         except OSError:
-                pass
+            pass
     else:
         shutil.copy(download, outputFile)
 
-if __name__ == '__main__':
+def construct_multi_filename(id, name, file_type):
+    """ Implementation of *Number of Output datasets cannot be determined until
+    tool run* from documentation_.
+    .. _documentation: http://wiki.galaxyproject.org/Admin/Tools/Multiple%20Output%20Files
+    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
+    """
+    filename = "%s_%s_%s_%s_%s" % ('primary', id, name, 'visible', file_type)
+    return filename
 
-    kms_url = "https://tstest-kms.it.csiro.au/kmscolab_3_0"
-    ims_url = "https://tstest-ims.it.csiro.au/ims_3_0/services/IMS"
-    username = sys.argv[1]
-    password = sys.argv[2]
-    client_key = "desktop"
-    client_secret = "cpU92F1PT7VOCANjSknuCDp4DrubmujoBaF6b0miz8OpKNokEbGMHCaSFK5/lISbBmaaGVCgeADI2A39F3Hkeg=="
-    storename = sys.argv[3]
-    path = sys.argv[4]
-    fileType = sys.argv[5]
-    filename = ""
-    outputFile = ""
-    if len(sys.argv) > 9:
-        filename = sys.argv[6]
-        outputFile = sys.argv[7]
-        outputFileId = sys.argv[8]
-        otherFilesDir = sys.argv[9]
+def metadata_to_json(dataset_id, filename, name, extesion, ds_type='dataset', primary=False):
+    """ Return line separated JSON
+    From https://github.com/mdshw5/galaxy-json-data-source/blob/master/json_data_source.py
+    """
+    meta_dict = dict(type=ds_type,
+                     ext=extesion,
+                     filename=filename,
+                     name=name,
+                     metadata={})
+    if primary:
+        meta_dict['base_dataset_id'] = dataset_id
     else:
-        outputFile = sys.argv[6]
+        meta_dict['dataset_id'] = dataset_id
+    return "%s\n" % json.dumps(meta_dict)
 
-    config = TrustStoreClient.Config(ims_url, kms_url, client_key, client_secret, tmpDir='/mnt/galaxy/tmp')
-    ts = TrustStoreClient.TrustStoreClient(headless=False, config=config)
-    try:
-        ts.authenticate(username, password)
-    except TrustStoreClient.TrustStoreClientAuthenticationException as e:
-        print e
+def main():
+    properties_file = sys.argv[1]
+    ouput_root = sys.argv[2]
+    json_params = None
+    metadata_path = None
+    with open(properties_file, 'r') as file_:
+        all_params = json.loads(file_.read())
+        json_params = all_params.get("param_dict")
+        metadata_path = all_params["job_config"]["TOOL_PROVIDED_JOB_METADATA_FILE"]
+
+    output_filename = json_params.get('output', None)
+    output_data = json_params.get('output_data')
+    extra_files_path, file_name, ext, out_data_name, hda_id, dataset_id = \
+      operator.itemgetter('extra_files_path', 'file_name', 'ext', 'out_data_name', 'hda_id', 'dataset_id')(output_data[0])
+
+    url_params = json_params['url'].split(";")
+    if len(url_params) < 3:
+        print("The url we got back is malformed: "+ url_params)
         sys.exit(5)
-    ts.getPrivateKey('privkey.pem')
-    listing = ts.listStores()
-    found = False
-    for store in listing:
-        if store.friendly_name == storename:
-            found = True
-            root = ts.listDirectory(store)
-            location = None
-            if path != "/":
-                location = ts_utils.ts_utils.dirAtPath(root, path)
-                if not location:
-                    print "Path not found"
-                    sys.exit(3)
-            else:
-                location = root
-            if filename and filename != "":
-                outputFileList = [outputFile]
-                inputFileList = None
-                if "," in filename: # we have multiple files guys.
-                    inputFileList = filename.split(",")
-                    for inputFile in inputFileList[1:]: # First file will be sent to outputFile.
-                        outName = "%s_%s_%s_%s_%s" % ('primary', outputFileId, inputFile.replace(".","-"), 'visible', fileType)
-                        outputFileList.append(os.path.join(otherFilesDir, outName))
-                else:
-                    inputFileList = [filename]
-                for inFile, outFile in zip(inputFileList, outputFileList):
-                    downloadMe = ts_utils.ts_utils.recurseToChildNamed(location, inFile)
-                    if downloadMe:
-                        download = ts.getFile(store, downloadMe)
-                        ungzip(download, outFile)
-                    else:
-                        print "File %s not found" % inFile
-                        sys.exit(4)
-            else:
-                with open(outputFile, 'w+') as f:
-                    try:
-                        for child in root.children:
-                            printNice(child, f, 0)
-                    except AttributeError as e:
-                        print e
-                        print root
-    if not found:
-        print "Store %s not found" % storename
+    short_url = url_params[0]
+    username = url_params[1]
+    password = url_params[2]
+    if "/short" not in short_url:
+        print("The url we got back is malformed: " + url_params)
+        sys.exit(5)
+    kms_url = short_url.split("/short")[0]
+
+    config = TrustStoreClient.Config(
+        None, kms_url, CLIENT_KEY, CLIENT_SECRET, tmpDir='/mnt/galaxy/tmp')
+    truststore = TrustStoreClient.TrustStoreClient(headless=False, config=config)
+    try:
+        truststore.authenticate(username, password)
+    except TrustStoreClient.TrustStoreClientAuthenticationException as err:
+        print(err)
+        sys.exit(5)
+    truststore.getPrivateKey('privkey.pem')
+
+    path_texts = truststore.lengthenPath(short_url)
+    if len(path_texts) < 2:
+        print("The path we got was malformed.")
+        sys.exit(3)
+    paths = path_texts[1:]
+    store_id = path_texts[0]
+
+    store = truststore.getStoreByID(store_id)
+    if store is None:
+        print("Coudn't find store with that ID, or don't have access.")
         sys.exit(2)
+    root = truststore.listDirectory(store)
+
+    with open(metadata_path, 'wb') as metadata_file:
+        for path in paths:
+            locations = utils.Navigation.files_at_path(root, path)
+            for location in locations:
+                if not locations:
+                    print("Path not found: " + path)
+                    continue
+                filename = "".join(c in SAFE_CHARS and c or '-' for c in location.name)
+                extension = os.path.splitext(filename)[1]
+                name = construct_multi_filename(hda_id, filename, extension)
+                target_output_filename = os.path.normpath("/".join([ouput_root, name]))
+                metadata_file.write(
+                    metadata_to_json(dataset_id, filename, name, extension, target_output_filename))
+                download = truststore.getFile(store, location)
+                if download is None:
+                    print("File %s not found." % location.name)
+                    sys.exit(4)
+                ungzip(download, target_output_filename)
+
+
+if __name__ == '__main__':
+    main()
--- a/TrustStoreGalaxyBrowse.xml	Mon May 11 15:31:52 2015 +1000
+++ b/TrustStoreGalaxyBrowse.xml	Tue May 12 11:35:04 2015 +1000
@@ -1,14 +1,14 @@
 <tool id="ts_import_2" name="Browse Data from TrustStore" tool_type="data_source" version="0.5" force_history_refresh="True"> <!-- tool_type="data_source"> -->
     <description> secure cloud storage.</description>
     <stdio>
-        <exit_code range="2" err_level="fatal" description="Could not find store with supplied name." />
-        <exit_code range="3" err_level="fatal" description="Could not find supplied path in store." />
-        <exit_code range="4" err_level="fatal" description="Could not find supplied filename in store at path." />
+        <exit_code range="2" err_level="fatal" description="Could not find store with given ID, or don't have access rights." />
+        <exit_code range="3" err_level="fatal" description="Could not find path in store." />
+        <exit_code range="4" err_level="fatal" description="Could not find matching file in store." />
         <exit_code range="5" err_level="fatal" description="Bad credentials supplied. See stdout for more information." />
     </stdio>
-    <command interpreter="python">TrustStoreGalaxyImport.py $url</command>
-    <inputs action="https://tstest-kms.it.csiro.au/kmscolab_3_0/login" check_values="false" method="get">
-        <display>go to TrustStore server</display>
+    <command interpreter="python">TrustStoreGalaxyImport.py $output "."</command>
+    <inputs action="https://tstest-kms.it.csiro.au/kmscolab_3_0/login" check_values="false" method="get" target="_top">
+        <display>go to TrustStore server $GALAXY_URL $tool_id</display>
         <!--<param name="appCallbackUrl" type="baseurl" value="/tool_runner?tool_id=ts_import_2" />-->
         <param name="GALAXY_URL" type="baseurl" value="/tool_runner?tool_id=testtoolshed.g2.bx.psu.edu%2Frepos%2Fcathywise%2Ftruststore_browse_testing%2Fts_import_2%2F0.5" />
     </inputs>
@@ -16,13 +16,13 @@
         <request_param galaxy_name="url" remote_name="url" missing="" />
     </request_param_translation>
     <outputs>
-        <data name="output1" format="auto" />
+        <data name="output" format="auto" />
     </outputs>
     <uihints minwidth="800"/>
     <options sanitize="False" refresh="True"/>
 
     <requirements>
-        <requirement type="package" version="0.5">py_ts</requirement>
+        <requirement type="package" version="0.5.1">py_ts</requirement>
         <requirement type="python_module">py_ts</requirement>
         <requirement type="binary" version="1.0.0">openssl</requirement>
     </requirements>
--- a/tool_dependencies.xml	Mon May 11 15:31:52 2015 +1000
+++ b/tool_dependencies.xml	Tue May 12 11:35:04 2015 +1000
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <tool_dependency>
-    <package name="py_ts" version="0.5">
+    <package name="py_ts" version="0.5.1">
         <readme>
             I should write this.
         </readme>
@@ -15,7 +15,7 @@
 pycrypto
 psycopg2
                 </action>
-                <action type="download_file">https://tstest-kms.it.csiro.au/truststore/downloads/PythonTrustStore-0.5.tar.gz</action>
+                <action type="download_file">https://tstest-kms.it.csiro.au/truststore/downloads/PythonTrustStore-0.5.1.tar.gz</action>
                 <action type="shell_command">.  $INSTALL_DIR/venv/bin/activate;
 $INSTALL_DIR/venv/bin/pip install --install-option="--home=$INSTALL_DIR" --install-option="--install-scripts=$INSTALL_DIR/bin" PythonTrustStore-0.5.tar.gz</action>
                 <action type="make_directory">$INSTALL_DIR/lib/python</action>