changeset 0:d2c51cdc2172 draft

Uploaded
author trinity_ctat
date Tue, 01 May 2018 12:36:56 -0400
parents
children fbe2227fe0d6
files data_manager/add_ctat_resource_lib.py data_manager/add_ctat_resource_lib.xml data_manager_conf.xml tool-data/ctat_genome_resource_libs.loc.sample tool_data_table_conf.xml.sample
diffstat 5 files changed, 649 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/add_ctat_resource_lib.py	Tue May 01 12:36:56 2018 -0400
@@ -0,0 +1,481 @@
+#!/usr/bin/env python
+# ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
+
+# Rewritten by H.E. Cicada Brokaw Dennis from a source downloaded from the toolshed and
+# other example code on the web.
+# This now allows downloading of a user selected library
+# but only from the CTAT Genome Resource Library website.
+# Ultimately we might want to allow the user to specify any location 
+# from which to download.
+# Users can create or download other libraries and use this tool to add them if they don't want
+# to add them by hand.
+
+import argparse
+import os
+#import tarfile
+#import urllib
+import subprocess
+
+# Comment out the following line when testing without galaxy package.
+from galaxy.util.json import to_json_string
+# The following is not being used, but leaving as info
+# in case we ever want to get input values using json.
+# from galaxy.util.json import from_json_string
+
+# datetime.now() is used to create the unique_id
+from datetime import datetime
+
+# The FileListParser is used by get_ctat_genome_filenames(),
+# which is called by the Data Manager interface (.xml file) to get
+# the filenames that are available online at broadinstitute.org
+# Not sure best way to do it. 
+# This object uses HTMLParser to look through the html 
+# searching for the filenames within anchor tags.
+import urllib2
+from HTMLParser import HTMLParser
+
+_CTAT_ResourceLib_URL = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/'
+_CTAT_BuildDir_Name = 'ctat_genome_lib_build_dir'
+_CTAT_ResourceLib_DisplayNamePrefix = 'CTAT_GenomeResourceLib_'
+_CTAT_ResourceLib_DefaultGenome = 'Unspecified_Genome'
+_NumBytesNeededForBuild = 64424509440 # 60 Gigabytes. FIX - This might not be correct.
+_Download_TestFile = "write_testfile.txt"
+_DownloadSuccessFile = 'download_succeeded.txt'
+
+class FileListParser(HTMLParser):
+    def __init__(self):
+        # Have to use direct call to super class rather than using super():
+        # super(FileListParser, self).__init__()
+        # because HTMLParser is an "old style" class and its inheritance chain does not include object.
+        HTMLParser.__init__(self)
+        self.urls = set()
+    def handle_starttag(self, tag, attrs):
+        # Look for filename references in anchor tags and add them to urls.
+        if tag == "a":
+            # The tag is an anchor tag.
+            for attribute in attrs:
+                # print "Checking: {:s}".format(str(attribute))
+                if attribute[0] == "href":
+                    # Does the href have a tar.gz in it?
+                    if ("tar.gz" in attribute[1]) and ("md5" not in attribute[1]):
+                        # Add the value to urls.
+                        self.urls.add(attribute[1])            
+# End of class FileListParser
+
+def get_ctat_genome_urls():
+    # open the url and retrieve the urls of the files in the directory.
+    resource = urllib2.urlopen(_CTAT_ResourceLib_URL)
+    theHTML = resource.read()
+    filelist_parser = FileListParser()
+    filelist_parser.feed(theHTML)
+    # For dynamic options need to return an interable with contents that are tuples with 3 items.
+    # Item one is a string that is the display name put into the option list.
+    # Item two is the value that is put into the parameter associated with the option list.
+    # Item three is a True or False value, indicating whether the item is selected.
+    options = []
+    for i, url in enumerate(filelist_parser.urls):
+        # The urls look like: 
+        # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz
+        # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz
+        filename = url.split("/")[-1]
+        if filename.split("_")[0] != "Mouse":
+            # Take out the mouse genome options for now.
+            # The mouse genome option is not handled correctly yet
+            options.append((filename, url, i == 0))
+    # return a tuple of the urls
+    return options
+
+# The following was used by the example program to get input parameters through the json.
+# Just leaving here for reference.
+# We are getting all of our parameter values through command line arguments.
+#def get_reference_id_name(params):
+#    genome_id = params['param_dict']['genome_id']
+#    genome_name = params['param_dict']['genome_name']
+#    return genome_id, genome_name
+#
+#def get_url(params):
+#    trained_url = params['param_dict']['trained_url']
+#    return trained_url
+
+def download_from_BroadInst(source, destination, force_download):
+    # Input Parameters
+    # source is the full URL of the file we want to download.
+    #     It should look something like:
+    #     https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz
+    # destination is the location where the source file will be unarchived.
+    #     Relative paths are expanded using the current working directory, so within Galaxy,
+    #     it is best to send in absolute fully specified path names so you know to where
+    #     the source file going to be extracted.
+    # force_download will cause a new download and extraction to occur, even if the destination
+    #     has a file in it indicating that a previous download succeeded.
+    #
+    # Returns the following:
+    # return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded)
+    # downloaded_directory
+    #     The directory which was created as a subdirectory of the destination directory
+    #     when the download occurred, or if there was no download, 
+    #     possibly the same directory as destination, if that is where the data resides.
+    # download_has_source_data
+    #     Is a boolean indicating whether the source file was "source_data" or was "plug-n-play".
+    # genome_build_directory
+    #     The directory where the genome resource library is or where it should be built. 
+    #     It can be the same as the downloaded directory, but is sometimes a subdirectory of it.
+    # lib_was_downloaded
+    #     Since it doesn't always do the download, the function returns whether download occurred.
+    lib_was_downloaded = False
+    
+    # Get the root filename of the Genome Directory.
+    src_filename = source.split("/")[-1]
+    root_genome_dirname = src_filename.split(".")[0]
+    # If the src_filename indicates it is a source file, as opposed to plug-n-play, 
+    # then we may need to do some post processing on it.
+    type_of_download = src_filename.split(".")[1]
+    download_has_source_data = (type_of_download == "source_data")
+
+    # We want to make sure that destination is absolute fully specified path.
+    cannonical_destination = os.path.realpath(destination)
+    if os.path.exists(cannonical_destination):
+        if not os.path.isdir(cannonical_destination):
+            raise ValueError("The destination is not a directory: " + \
+                             "{:s}".format(cannonical_destination))
+        # else all is good. It is a directory.
+    else:
+        # We need to create it.
+        try:
+            os.makedirs(cannonical_destination)
+        except os.error:
+            print "ERROR: Trying to create the following directory path:"
+            print "\t{:s}".format(cannonical_destination)
+            raise
+
+    # Make sure the directory now exists and we can write to it.
+    if not os.path.exists(cannonical_destination):
+        # It should have been created, but if it doesn't exist at this point
+        # in the code, something is wrong. Raise an error.
+        raise OSError("The destination directory could not be created: " + \
+                      "{:s}".format(cannonical_destination))
+    test_writing_file = "{:s}/{:s}".format(cannonical_destination, _Download_TestFile)
+    try:
+        filehandle = open(test_writing_file, "w")
+        filehandle.write("Testing writing to this file.")
+        filehandle.close()
+        os.remove(test_writing_file)
+    except IOError:
+        print "The destination directory could not be written into: " + \
+                      "{:s}".format(cannonical_destination)
+        raise
+    
+    # Get the list of files in the directory,
+    # We use it to check for a previous download or extraction among other things.
+    orig_files_in_destdir = set(os.listdir(cannonical_destination))
+    # See whether the file has been downloaded already.
+    download_success_file_path = "{:s}/{:s}".format(cannonical_destination, _DownloadSuccessFile)
+    if ((_DownloadSuccessFile not in orig_files_in_destdir) \
+        or (root_genome_dirname not in orig_files_in_destdir) \
+        or force_download):
+        # Check whether there is enough space on the device for the library.
+        statvfs = os.statvfs(cannonical_destination)
+        # fs_size = statvfs.f_frsize * statvfs.f_blocks          # Size of filesystem in bytes
+        # num_free_bytes = statvfs.f_frsize * statvfs.f_bfree    # Actual number of free bytes
+        num_avail_bytes = statvfs.f_frsize * statvfs.f_bavail    # Number of free bytes that ordinary users
+                                                                 # are allowed to use (excl. reserved space)
+        if (num_avail_bytes < _NumBytesNeededForBuild):
+            raise OSError("There is insufficient space ({:s} bytes)".format(str(num_avail_bytes)) + \
+                          " on the device of the destination directory: " + \
+                          "{:s}".format(cannonical_destination))
+    
+        #Previous code to download and untar. Not using anymore.
+        #full_filepath = os.path.join(destination, src_filename)
+        #
+        #Download ref: https://dzone.com/articles/how-download-file-python
+        #f = urllib2.urlopen(source)
+        #data = f.read()
+        #with open(full_filepath, 'wb') as code:
+        #    code.write(data)
+        #
+        #Another way to download:
+        #try: 
+        #    urllib.urlretrieve(url=source, filename=full_filepath)
+        #
+        #Then untar the file.
+        #try: 
+        #    tarfile.open(full_filepath, mode='r:*').extractall()
+    
+        if (_DownloadSuccessFile in orig_files_in_destdir):
+            # Since we are redoing the download, 
+            # the success file needs to be removed
+            # until the download has succeeded.
+            os.remove(download_success_file_path)
+        # We want to transfer and untar the file without storing the tar file, because that
+        # adds all that much more space to the needed amount of free space on the disk.
+        # Use subprocess to pipe the output of curl into tar.
+        command = "curl {:s} | tar -xzvf - -C {:s}".format(source, cannonical_destination)
+        try: # to send the command that downloads and extracts the file.
+            command_output = subprocess.check_output(command, shell=True)
+            # FIX - not sure check_output is what we want to use. If we want to have an error raised on
+            # any problem, maybe we should not be checking output.
+        except subprocess.CalledProcessError:
+            print "ERROR: Trying to run the following command:\n\t{:s}".format(command)
+            raise
+        else:
+            lib_was_downloaded = True
+
+    # Some code to help us if errors occur.
+    print "\n*******************************\nFinished download and extraction."
+    subprocess.check_call("ls -lad {:s}/*".format(cannonical_destination), shell=True)
+    subprocess.check_call("ls -lad {:s}/*/*".format(cannonical_destination), shell=True)
+    
+    newfiles_in_destdir = set(os.listdir(cannonical_destination)) - orig_files_in_destdir
+    if (root_genome_dirname not in newfiles_in_destdir):
+        # Perhaps it has a different name than what we expected it to be.
+        # It will be the file that was not in the directory
+        # before we did the download and extraction.
+        found_filename = None
+        if len(newfiles_in_destdir) == 1:
+            found_filename = newfiles_in_destdir[0]
+        else:
+            for filename in newfiles_in_destdir:
+                # In most cases, there will only be one new file, but some OS's might have created
+                # other files in the directory.
+                # Look for the directory that was downloaded and extracted.
+                # The correct file's name should be a substring of the tar file that was downloaded.
+                if filename in src_filename:
+                    found_filename = filename
+        if found_filename is not None:
+            root_genome_dirname = found_filename
+
+    downloaded_directory = "{:s}/{:s}".format(cannonical_destination, root_genome_dirname)
+
+    if (os.path.exists(downloaded_directory)):
+        try:
+            # Create a file to indicate that the download succeeded.
+            subprocess.check_call("touch {:s}".format(download_success_file_path), shell=True)
+        except IOError:
+            print "The download_success file could not be created: " + \
+                      "{:s}".format(download_success_file_path)
+            raise
+        # Look for the build directory, or specify the path where it should be placed.
+        if len(os.listdir(downloaded_directory)) == 1:
+            # Then that one file is a subdirectory that should be the downloaded_directory.
+            subdir_filename = os.listdir(downloaded_directory)[0]
+            genome_build_directory = "{:s}/{:s}".format(downloaded_directory, subdir_filename)
+        else:
+            genome_build_directory = "{:s}/{:s}".format(downloaded_directory, _CTAT_BuildDir_Name)
+    else:
+        raise ValueError("ERROR: Could not find the extracted file in the destination directory:" + \
+                             "\n\t{:s}".format(cannonical_destination))
+
+    return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded)
+        
+def gmap_the_library(genome_build_directory):
+        # This is the processing that needs to happen for gmap-fusion to work.
+        # genome_build_directory should normally be a fully specified path, 
+        # though it should work if it is relative.
+        command = "gmap_build -D {:s}/ -d ref_genome.fa.gmap -k 13 {:s}/ref_genome.fa".format( \
+                  genome_build_directory, genome_build_directory)
+        try: # to send the gmap_build command.
+            command_output = subprocess.check_output(command, shell=True)
+        except subprocess.CalledProcessError:
+            print "ERROR: While trying to run the gmap_build command on the library:\n\t{:s}".format(command)
+            raise
+        finally:
+            # Some code to help us if errors occur.
+            print "\n*******************************\nAfter running gmap_build."
+            if os.path.exists(genome_build_directory):
+                print "\nBuild Directory {:s}:".format(genome_build_directory)
+                subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True)
+                dir_entries = os.listdir(genome_build_directory)
+                for entry in dir_entries:
+                    entry_path = "{:s}/{:s}".format(genome_build_directory, entry)
+                    print "\nDirectory {:s}:".format(entry_path)
+                    subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
+                    if os.path.isdir(entry_path):
+                        subdir_entries = os.listdir(entry_path)
+                        for subdir_entry in subdir_entries:
+                            subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry)
+                            print "\nDirectory {:s}:".format(subdir_entry_path)
+                            subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True)
+            else:
+                print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory)
+            print "*******************************"
+
+def build_the_library(genome_source_directory, genome_build_directory, build, gmap_build):
+    """ genome_source_directory is the location of the source_data needed to build the library.
+            Normally it is fully specified, but could be relative.
+        genome_build_directory is the location where the library will be built.
+            It can be relative to the current working directory or an absolute path.
+        build specifies whether to run prep_genome_lib.pl even if it was run before.
+        gmap_build specifies whether to run gmap_build or not.
+
+        Following was the old way to do it. Before FusionFilter 0.5.0.
+        prep_genome_lib.pl \
+           --genome_fa ref_genome.fa \
+           --gtf ref_annot.gtf \
+           --blast_pairs blast_pairs.gene_syms.outfmt6.gz \
+           --fusion_annot_lib fusion_lib.dat.gz
+           --output_dir ctat_genome_lib_build_dir
+        index_pfam_domain_info.pl  \
+            --pfam_domains PFAM.domtblout.dat.gz \
+            --genome_lib_dir ctat_genome_lib_build_dir
+        gmap_build -D ctat_genome_lib_build_dir -d ref_genome.fa.gmap -k 13 ctat_genome_lib_build_dir/ref_genome.fa"
+    """
+    if (genome_source_directory != "" ) and build:
+        if os.path.exists(genome_source_directory):
+            os.chdir(genome_source_directory)
+            # FIX - look for a fusion_annot_lib and include it, else omit it.
+            command = "prep_genome_lib.pl --genome_fa ref_genome.fa --gtf ref_annot.gtf " + \
+                      "--fusion_annot_lib CTAT_HumanFusionLib.v0.1.0.dat.gz " + \
+                      "--annot_filter_rule AnnotFilterRule.pm " + \
+                      "--pfam_db PFAM.domtblout.dat.gz " + \
+                      "--output_dir {:s} ".format(genome_build_directory)
+            if gmap_build:
+                command += "--gmap_build "
+            try: # to send the prep_genome_lib command.
+                command_output = subprocess.check_call(command, shell=True)
+            except subprocess.CalledProcessError:
+                print "ERROR: While trying to run the prep_genome_lib.pl command " + \
+                    "on the CTAT Genome Resource Library:\n\t{:s}".format(command)
+                raise
+            finally:
+                # Some code to help us if errors occur.
+                print "*******************************"
+                if os.path.exists(genome_build_directory):
+                    print "\nSource Directory {:s}:".format(genome_source_directory)
+                    subprocess.check_call("ls -la {:s}".format(genome_source_directory), shell=True)
+                    dir_entries = os.listdir(genome_source_directory)
+                    for entry in dir_entries:
+                        entry_path = "{:s}/{:s}".format(genome_source_directory, entry)
+                        print "\nDirectory {:s}:".format(entry_path)
+                        subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
+                else:
+                    print "Genome Source Directory does not exist:\n\t{:s}".format(genome_source_directory)
+                if os.path.exists(genome_build_directory):
+                    print "\nBuild Directory {:s}:".format(genome_build_directory)
+                    subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True)
+                    dir_entries = os.listdir(genome_build_directory)
+                    for entry in dir_entries:
+                        entry_path = "{:s}/{:s}".format(genome_build_directory, entry)
+                        print "\nDirectory {:s}:".format(entry_path)
+                        subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
+                        if os.path.isdir(entry_path):
+                            subdir_entries = os.listdir(entry_path)
+                            for subdir_entry in subdir_entries:
+                                subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry)
+                                print "\nDirectory {:s}:".format(subdir_entry_path)
+                                subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True)
+                else:
+                    print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory)
+                print "*******************************"
+        else:
+            raise ValueError("Cannot build the CTAT Genome Resource Library. " + \
+                "The source directory does not exist:\n\t{:s}".format(genome_source_directory))
+    elif gmap_build:
+        gmap_the_library(genome_build_directory)
+
+def main():
+    #Parse Command Line
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-s', '--source_url', default="", \
+        help='This is the url of a file with the data. They come from https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/.')
+    parser.add_argument('-n', '--display_name', default="", \
+        help='Is used as the display name for the entry of this Genome Resource Library in the data table.')
+    parser.add_argument('-p', '--destination_path', \
+        help='Full path of the CTAT Resource Library location or destination, either where it is, or where it will be placed.')
+    parser.add_argument('-o', '--output_filename', \
+        help='Name of the output file, where the json dictionary will be written.')
+    parser.add_argument('-f', '--force_download', 
+        help='Forces download of the Genome Resource Library, even if previously downloaded.', action="store_true")
+    parser.add_argument('-b', '--build', 
+        help='Forces build/rebuild the Genome Resource Library, even if previously built. ' + \
+             'Must have downloaded source_data for this to work.', action="store_true")
+    parser.add_argument('-m', '--gmap_build', 
+        help='Must be selected if you want the library to be gmapped. ' + \
+             'Will force gmap_build of the Genome Resource Library, even if previously gmapped.', action="store_true")
+    args = parser.parse_args()
+
+    # All of the input parameters are written by default to the output file prior to
+    # this program being called.
+    # But I do not get input values from the json file, but rather from command line.
+    # Just leaving the following code as a comment, in case it might be useful to someone later.
+    # params = from_json_string(open(filename).read())
+    # target_directory = params['output_data'][0]['extra_files_path']
+    # os.mkdir(target_directory)
+
+    # FIX - not sure the lib_was_downloaded actually serves a purpose...
+    lib_was_downloaded = False
+    download_has_source_data = False
+    # If we do not download the directory, the destination_path should be the
+    # location of the genome resource library.
+    downloaded_directory = None
+    # FIX - look inside of the args.destination_path to see if the build directory is inside it or is it.
+    genome_build_directory = None
+    # FIX - need to make sure we are handling all "possible" combinations of arguments.
+    # Probably would be good if we could simplify/remove some of them.
+    if (args.source_url != ""):
+        downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded = \
+            download_from_BroadInst(source=args.source_url, \
+                                    destination=args.destination_path, \
+                                    force_download=args.force_download)
+    else:
+        genome_build_directory = args.destination_path
+        if not os.path.exists(genome_build_directory):
+            raise ValueError("Cannot find the CTAT Genome Resource Library. " + \
+                "The directory does not exist:\n\t{:s}".format(genome_build_directory))
+        # else:
+        # FIX - Check if there is an actual CTAT Genome Resource Lib there.
+        #    _CTAT_BuildDir_Name
+
+    print "\nThe location of the CTAT Genome Resource Library is {:s}.\n".format(genome_build_directory)
+
+    # Take out builds for testing.
+    # FIX - We should leave a file indicating build success the same way we do for download success.
+    if (download_has_source_data or args.build or args.gmap_build) :
+        build_the_library(downloaded_directory, genome_build_directory, args.build, args.gmap_build)
+    elif (args.gmap_build):
+        gmap_the_library(genome_build_directory)
+
+    if (args.source_url != None) and (args.source_url != ""):
+        # Get the name out of the source's filename.
+        source_filename_root = args.source_url.split("/")[-1].split(".")[0]
+
+    # Determine the display_name for the library.
+    if (args.display_name is None) or (args.display_name == ""):
+        if (source_filename_root != None) and (source_filename_root != ""):
+            # Get the name out of the source filename.
+            display_name = _CTAT_ResourceLib_DisplayNamePrefix + source_filename_root
+        else:
+            display_name = _CTAT_ResourceLib_DisplayNamePrefix + _CTAT_ResourceLib_DefaultGenome
+            print "WARNING: We do not have a genome name. Using a default name, that might not be correct."
+    else:
+        display_name = _CTAT_ResourceLib_DisplayNamePrefix + args.display_name
+    display_name = display_name.replace(" ","_")
+    print "The Genome Name will be set to: {:s}\n".format(display_name)
+
+    # Create a unique_id for the library.
+    datetime_stamp = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S_%f")
+    if (source_filename_root != None) and (source_filename_root != ""):
+        unique_id = source_filename_root + datetime_stamp
+    elif (downloaded_directory != None) and (downloaded_directory != ""):
+        unique_id = os.path.basename(downloaded_directory).split(".")[0]
+    else:
+        unique_id = _CTAT_ResourceLib_DefaultGenome + datetime_stamp
+
+    print "The Resource Lib's display_name will be set to: {:s}\n".format(display_name)
+    print "Its unique_id will be set to: {:s}\n".format(unique_id)
+    print "Its dir_path will be set to: {:s}\n".format(genome_build_directory)
+
+    data_manager_dict = {}
+    data_manager_dict['data_tables'] = {}
+    data_manager_dict['data_tables']['ctat_genome_resource_libs'] = []
+    data_table_entry = dict(value=unique_id, name=display_name, path=genome_build_directory)
+    data_manager_dict['data_tables']['ctat_genome_resource_libs'].append(data_table_entry)
+
+    # Temporarily the output file's dictionary is written for debugging:
+    print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict))
+    # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
+    # which then puts it into the correct .loc file (I think).
+    # Comment out the following line when testing without galaxy package.
+    open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/add_ctat_resource_lib.xml	Tue May 01 12:36:56 2018 -0400
@@ -0,0 +1,109 @@
+<tool id="ctat_genome_resource_lib_data_manager" 
+    name="CTAT Genome Resource Library Data Manager" 
+    version="1.0.0" tool_type="manage_data">
+    <description>Retrieve, and/or specify the location of, a CTAT Genome Resource Library. 
+    </description>
+    <requirements>
+        <requirement type="package" version="2.7">python</requirement>
+        <requirement type="package" version="0.5.0">fusion-filter</requirement>
+        <!-- gmap-fusion used to be required in order to process downloaded libraries 
+             to create all of the required files and indexes. It includes gmap
+             and FusionFilter, programs from both of which are needed.
+             Now there is a bioconda FusionFilter recipe. Lets try using that instead.
+        <requirement type="package" version="0.3.0">gmap-fusion</requirement>
+        -->
+    </requirements>
+    <command detect_errors="default">
+        <![CDATA[
+        python $__tool_directory__/add_ctat_resource_lib.py 
+            --display_name "${display_name}" 
+            --destination_path "${destination}" 
+            --output_filename "${out_file}" 
+            #if str( $download_question.download ) == "true":
+                --source_url "\"${download_question.source_url}\"" 
+                #if str( ${download_question.force_download} ) == "true":
+                    --force_download
+                #end if
+            #end if
+            #if str( ${rebuild} ) == "true":
+                --build 
+            #end if
+            #if str( ${gmap_build} ) == "true":
+                --gmap_build 
+            #end if
+        ]]>
+    </command>
+    <inputs>
+        <!-- The following are left in here, just as examples of various ways of doing options.
+            <param name="force_download" type="boolean" checked="false"
+                truevalue="- -force_download" falsevalue="" label="Force New Download? (yes/no)" />
+            <param name="download" type="select" label="Need to Download?">
+                <option value="single" selected="true">Single Dataset</option>
+                <option value="paired_collection">Paired Collection</option>
+            <when value="paired_collection">
+                 <param name="fastq_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Select dataset pair" help="Specify paired dataset collection containing paired reads"/>
+            </when>
+        -->
+        <conditional name="download_question">
+            <param name="download" type="boolean" checked="false" label="Need to Download?" />
+            </param>
+            <when value="true">
+                <!-- The use of a code block to get dynamic options is now deprecated and discouraged.
+                     I am still using it here. The only other way I can think of to do this is to
+                     create another data_manager that gets the list of files and puts them into a
+                     data_table, that is then used to get the filenames. That would require the admin
+                     to first run the data_manager that builds the filename data_table before running
+                     this data_manager.
+                This is the dynamic way to get the options filled.
+                <param name="filename" type="select" label="Select File" display="radio" 
+                    dynamic_options="get_ctat_genome_filenames()" 
+                    help="Select a CTAT Genome Resource Library to Download." />
+                Here is the static method for what is online in April 2017:
+                <param name="filename" type="select" label="Choose which library to download.">
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz">
+                        GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz
+                    </option>
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/                        GRCh37_v19_CTAT_lib_Feb092018.source_data.tar.gz">
+                        GRCh37_v19_CTAT_lib_Feb092018.source_data.tar.gz
+                    </option>                        
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/                        GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz">
+                        GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz
+                    </option>
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/                        GRCh38_v27_CTAT_lib_Feb092018.source_data.tar.gz">
+                        GRCh38_v27_CTAT_lib_Feb092018.source_data.tar.gz
+                    </option>
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/                        Mouse_M16_CTAT_lib_Feb202018.plug-n-play.tar.gz">
+                        Mouse_M16_CTAT_lib_Feb202018.plug-n-play.tar.gz
+                    </option>
+                    <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/                        Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz">
+                        Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz
+                    </option>
+                -->
+                <param name="source_url" type="select" label="Select a File"
+                    dynamic_options="get_ctat_genome_urls()" 
+                    help="Select a CTAT Genome Resource Library to Download." />
+                </param>
+                <param name="force_download" type="boolean" checked="false" label="Force New Download?" />
+            </when>
+        </conditional>
+
+        <param name="display_name" type="text" label="Reference Genome Display Name" />
+        <param name="destination" type="text" label="Local Destination (full path)" />
+        <param name="rebuild" type="boolean" checked="false" label="Force rebuild of Library?" />
+        <param name="gmap_build" type="boolean" checked="false" label="Do a gmap_build on the Library?" />
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <help>
+        Retrieve, and/or specify the location of, a CTAT Genome Resource Library.
+        When download is true, the files at https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/
+        are used as selectors for the user to choose among.
+        Specify the Full Path of the location where the CTAT Resource Library should be placed.
+        You will need approximately 60GB of space for this library.
+        If you already have the library, specify the full path of the location where it exists and leave the download box unchecked.
+        The Reference Genome name may be left empty if downloading. The name will be used as the selector text of the entry in the data table.
+        For more information on CTAT Genome Resource Libraries, see <a http="https://github.com/FusionFilter/FusionFilter/wiki">FusionFilter</a>
+    </help>
+    <code file="add_ctat_resource_lib.py" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Tue May 01 12:36:56 2018 -0400
@@ -0,0 +1,38 @@
+<?xml version="1.0"?>
+<data_managers>
+    <data_manager tool_file="data_manager/add_ctat_resource_lib.xml" id="ctat_genome_resource_lib_data_manager"> 
+        <data_table name="ctat_genome_resource_libs">
+            <output>
+                <column name="value" />
+                    <!-- value is used to uniquely identify this entry in the table.
+                    -->
+                <column name="name" />
+                    <!-- name is used as the selector in the pull down lists for items in this table.
+                    -->
+                <column name="path" />
+                    <!-- path is the absolute path of the top level directory of the CTAT Genome Resource Library.
+                    -->
+                <!-- <column name="path" output_ref="out_file"> -->
+                    <!-- It is typical to move the data file, but because our tool gets the destination
+                    location from the user, we do not want to move the data from that location.
+                    The full path of the CTAT Resource library is returned in location. 
+                    So no need to change the value either.
+                    The files are so big we do not want to be making copies of them.
+                    They are created where we want them.
+                    -->
+                    <!-- <move type="file" relativize_symlinks="False"> -->
+                        <!--<source>${path}</source> -->
+                        <!--<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">ctat_genome_lib_build_dir</target> -->
+                    <!--</move> -->
+                    <!--
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/ctat_genome_lib_build_dir
+                    </value_translation>
+                    -->
+                    <!-- The location returned by the tool should already be an absolute path.
+                    <value_translation type="function">abspath</value_translation>
+                    -->
+                <!--</column> -->
+            </output>
+        </data_table>
+    </data_manager>
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_genome_resource_libs.loc.sample	Tue May 01 12:36:56 2018 -0400
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Genome Resource Libraries
+# Usually there will only be one library, but it is concievable 
+# that there could be multiple libraries.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the genome resource lib files are stored
+#
+#ctat_genome_resource_libs.loc could look like:
+#
+#GRCh38_v27_CTAT_lib_Feb092018	CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018	/path/to/ctat/genome/resource/lib/directory
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue May 01 12:36:56 2018 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False">
+        <columns>value, name, path</columns>
+        <file path="tool-data/ctat_genome_resource_libs.loc" />
+    </table>
+</tables>