# HG changeset patch
# User trinity_ctat
# Date 1525119457 14400
# Node ID 367b0d693b0c32a3a995827be2ce834cf8ad277f
# Parent  3bb91cebec5c59f1ed5648ff681bb984a155e777
Uploaded

diff -r 3bb91cebec5c -r 367b0d693b0c data_manager/add_ctat_centrifuge_index.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/add_ctat_centrifuge_index.py	Mon Apr 30 16:17:37 2018 -0400
@@ -0,0 +1,353 @@
+#!/usr/bin/env python
+# ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
+
+# Rewritten by H.E. Cicada Brokaw Dennis from a source downloaded from the toolshed and
+# other example code on the web.
+# This allows downloading of a centrifuge index, or specification of its disk location.
+# This index is one of the input paramters needed by the ctat_metagenomics tool.
+# At the moment only one index is supported by the ctat_metagenomics tool:
+# ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz
+
+import argparse
+import os
+#import tarfile
+#import urllib
+import subprocess
+
+# The following is used to generate a unique_id value
+from datetime import *
+
+# Remove the following line when testing without galaxy package:
+from galaxy.util.json import to_json_string
+# Am not using the following:
+# from galaxy.util.json import from_json_string
+
+# The FileListParser is used by get_ctat_genome_filenames(),
+# which is called by the Data Manager interface (.xml file) to get
+# the filenames that are available online at broadinstitute.org
+# Not sure best way to do it. 
+# This object uses HTMLParser to look through the html 
+# searching for the filenames within anchor tags.
+import urllib2
+from HTMLParser import HTMLParser
+
+_CTAT_CentrifugeIndexPage_URL = 'https://ccb.jhu.edu/software/centrifuge/'
+_CTAT_CentrifugeDownload_URL = 'ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz'
+_CTAT_CentrifugeIndexTableName = 'ctat_centrifuge_indexes'
+_CTAT_CentrifugeDir_Name = 'p_compressed+h+v'
+_CTAT_Centrifuge_DisplayNamePrefix = 'CTAT_CentrifugeIndex_'
+_CentrifugeIndexFileExtension = 'cf'
+_NumBytesNeededForIndex = 7400130287 # 6.9 GB
+#_DownloadFileSize = 5790678746 # 5.4 Gigabytes.
+_Download_TestFile = 'write_testfile.txt'
+_DownloadSuccessFile = 'download_succeeded.txt'
+
+class FileListParser(HTMLParser):
+    def __init__(self):
+        # Have to use direct call to super class rather than using super():
+        # super(FileListParser, self).__init__()
+        # because HTMLParser is an "old style" class and its inheritance chain does not include object.
+        HTMLParser.__init__(self)
+        self.filenames = set()
+    def handle_starttag(self, tag, attrs):
+        # Look for filename references in anchor tags and add them to filenames.
+        if tag == "a":
+            # The tag is an anchor tag.
+            for attribute in attrs:
+                # print "Checking: {:s}".format(str(attribute))
+                if attribute[0] == "href":
+                    # Does the href have a tar.gz in it?
+                    if ("tar.gz" in attribute[1]) and ("md5" not in attribute[1]):
+                        # Add the value to filenames.
+                        self.filenames.add(attribute[1])            
+# End of class FileListParser
+
+def get_ctat_centrifuge_index_locations():
+    # For dynamic options need to return an interable with contents that are tuples with 3 items.
+    # Item one is a string that is the display name put into the option list.
+    # Item two is the value that is put into the parameter associated with the option list.
+    # Item three is a True or False value, indicating whether the item is selected.
+    options = []
+    # open the url and retrieve the filenames of the files in the directory.
+    resource = urllib2.urlopen(_CTAT_CentrifugeIndexPage_URL)
+    theHTML = resource.read()
+    filelist_parser = FileListParser()
+    filelist_parser.feed(theHTML)
+    # This is what was returned on 2018-04-23
+    # ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed_2018_4_15.tar.gz
+    # ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/nt_2018_3_3.tar.gz
+    # ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz
+    # ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p+h+v.tar.gz
+    # Which could be hard coded:
+    # vals.append(("p_compressed+h+v", "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz", True))
+    # vals.append(("p+h+v", "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p+h+v.tar.gz", False))
+    # vals.append(("nt_2018_3_3", "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/nt_2018_3_3.tar.gz", False))
+    # vals.append(("p_compressed_2018_4_15", "ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed_2018_4_15.tar.gz", False))
+    # but only returning the one we want, which for now is assumed to be present.
+    # For now, I am printing the list, just so I can see what was returned, 
+    print "FYI: The URL's that were found on Centrifuge's page are:"
+    print "\t" + "\n\t".join(filelist_parser.filenames)
+    # For now instead of sending back the list of found URL's, send back the one URL we want.
+    # Currently, only one of the options is supported.
+    vals.append((_CTAT_CentrifugeDir_Name, _CTAT_CentrifugeDownload_URL, True))
+    print "The items in vals are:"
+    print str(vals)
+    return vals 
+
+# The following was used by the example program to get input parameters through the json.
+# Just leaving here for reference.
+# We are getting all of our parameter values through command line arguments.
+#def get_reference_id_name(params):
+#    genome_id = params['param_dict']['genome_id']
+#    genome_name = params['param_dict']['genome_name']
+#    return genome_id, genome_name
+#
+#def get_url(params):
+#    trained_url = params['param_dict']['trained_url']
+#    return trained_url
+
+def download_index(src_location, destination, force_download):
+    # We do not know if the index has been downloaded already.
+    # This function returns whether or not the index actually gets downloaded.
+    index_was_downloaded = False
+    # Get the root filename of the Genome Directory. 
+    # The part after the last '/' and before the first '.'
+    root_index_dirname = src_location.split("/")[-1].split(".")[0]
+
+    # We want to make sure that destination is absolute fully specified path.
+    cannonical_destination = os.path.realpath(destination) 
+    if cannonical_destination.split("/")[-1] != root_index_dirname:
+        cannonical_destination += "/" + root_index_dirname
+    if os.path.exists(cannonical_destination):
+        if not os.path.isdir(cannonical_destination):
+            raise ValueError("The destination is not a directory: " + \
+                             "{:s}".format(cannonical_destination))
+        # else all is good. It is a directory.
+    else:
+        # We need to create it.
+        try:
+            os.makedirs(cannonical_destination)
+        except os.error:
+            print "ERROR: Trying to create the following directory path:"
+            print "\t{:s}".format(cannonical_destination)
+            raise
+
+    # Make sure the directory now exists and we can write to it.
+    if not os.path.exists(cannonical_destination):
+        # It should have been created, but if it doesn't exist at this point
+        # in the code, something is wrong. Raise an error.
+        raise OSError("The destination directory could not be created: " + \
+                      "{:s}".format(cannonical_destination))
+    test_writing_file = "{:s}/{:s}".format(cannonical_destination, _Download_TestFile)
+    try:
+        filehandle = open(test_writing_file, "w")
+        filehandle.write("Testing writing to this file.")
+        filehandle.close()
+        os.remove(test_writing_file)
+    except IOError:
+        print "The destination directory could not be written into: " + \
+                      "{:s}".format(cannonical_destination)
+        raise
+    
+    # Get the list of files in the directory,
+    # We use it to check for a previous download or extraction among other things.
+    orig_files_in_destdir = set(os.listdir(cannonical_destination))
+    # See whether the file has been downloaded already.
+    download_success_file_path = "{:s}/{:s}".format(cannonical_destination, _DownloadSuccessFile)
+    if (_DownloadSuccessFile not in orig_files_in_destdir) or force_download:
+        # Check whether there is enough space on the device for the index.
+        statvfs = os.statvfs(cannonical_destination)
+        # fs_size = statvfs.f_frsize * statvfs.f_blocks          # Size of filesystem in bytes
+        # num_free_bytes = statvfs.f_frsize * statvfs.f_bfree    # Actual number of free bytes
+        num_avail_bytes = statvfs.f_frsize * statvfs.f_bavail    # Number of free bytes that ordinary users
+                                                                 # are allowed to use (excl. reserved space)
+        if (num_avail_bytes < _NumBytesNeededForIndex):
+            raise OSError("There is insufficient space ({:s} bytes)".format(str(num_avail_bytes)) + \
+                          " on the device of the destination directory: " + \
+                          "{:s}".format(cannonical_destination))
+    
+        #Previous code to download and untar. Not using anymore.
+        #full_filepath = os.path.join(destination, src_filename)
+        #
+        #Download ref: https://dzone.com/articles/how-download-file-python
+        #f = urllib2.urlopen(ctat_resource_lib_url)
+        #data = f.read()
+        #with open(full_filepath, 'wb') as code:
+        #    code.write(data)
+        #
+        #Another way to download:
+        #try: 
+        #    urllib.urlretrieve(url=ctat_resource_lib_url, filename=full_filepath)
+        #
+        #Then untar the file.
+        #try: 
+        #    tarfile.open(full_filepath, mode='r:*').extractall()
+        
+        if (_DownloadSuccessFile in orig_files_in_destdir):
+            # Since we are redoing the download, 
+            # the success file needs to be removed
+            # until the download has succeeded.
+            os.remove(download_success_file_path)
+        # We want to transfer and untar the file without storing the tar file, because that
+        # adds all that much more space to the needed amount of free space on the disk.
+        # Use subprocess to pipe the output of curl into tar.
+        # Make curl silent so progress is not printed to stderr.
+        command = "curl --silent {:s} | tar -xzf - -C {:s}".format(src_location, cannonical_destination)
+        try: # to send the command that downloads and extracts the file.
+            command_output = subprocess.check_output(command, shell=True)
+            # FIX - not sure check_output is what we want to use. If we want to have an error raised on
+            # any problem, maybe we should not be checking output.
+        except subprocess.CalledProcessError:
+            print "ERROR: Trying to run the following command:\n\t{:s}".format(command)
+            raise
+        else:
+            index_was_downloaded = True
+
+    # Some code to help us if errors occur.
+    print "\n*******************************\nFinished download and extraction."
+    subprocess.check_call("ls -lad {:s}/*".format(cannonical_destination), shell=True)
+    
+    files_in_destdir = set(os.listdir(cannonical_destination))
+    found_filenames = set()
+    for filename in files_in_destdir:
+        # There should be three files, but some OS's might have created
+        # other files in the directory, or maybe the user did.
+        # Look for the index files.
+        # The download files' names should start with the root_index_dirname
+        # print "Is root: {:s} in file: {:s}".format(root_index_dirname, filename)
+        if root_index_dirname in filename:
+            found_filenames.add(filename)
+    # print "The found_filenames are:\n\t{:s}".format(str(found_filenames))
+    if (len(found_filenames) >= 3):
+        # FIX - we could md5 the files to make sure they are correct.
+        # Or at least check their sizes, to see if the download completed ok.
+        # Also we could check the names of the files.
+        try:
+            # Create a file to indicate that the download succeeded.
+            subprocess.check_call("touch {:s}".format(download_success_file_path), shell=True)
+        except IOError:
+            print "The download_success file could not be created: " + \
+                      "{:s}".format(download_success_file_path)
+            raise
+    else:
+        print "After download, the potential index files found are:\n\t{:s}".format(str(found_filenames))
+        raise ValueError("ERROR: Could not find the extracted index files " + \
+                         "in the destination directory:\n\t{:s}".format(cannonical_destination))
+
+    return (cannonical_destination, root_index_dirname, index_was_downloaded)
+        
+def main():
+    #Parse Command Line
+    # print "At start before parsing arguments."
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-d', '--download_location', default="", \
+        help='This is the download location of the centrifuge index.')
+    parser.add_argument('-n', '--display_name', default="", \
+        help='Is used as the selector text for the entry of this Centrifuge Index in the data table.')
+    parser.add_argument('-p', '--destination_path', \
+        help='Full path of the Centrifuge Index location or destination, either where it is, or where it will be placed.')
+    parser.add_argument('-o', '--output_filename', \
+        help='Name of the output file, where the json dictionary will be written.')
+    parser.add_argument('-f', '--force_download', 
+        help='Forces download of the Centrifuge Index, even if previously downloaded. ' + \
+             'Requires download_location to be set in order to work.', action="store_true")
+    args = parser.parse_args()
+
+    # All of the input parameters are written by default to the output file prior to
+    # this program being called.
+    # But I do not get input values from the json file, but rather from command line.
+    # Just leaving the following code as a comment, in case it might be useful to someone later.
+    # params = from_json_string(open(filename).read())
+    # target_directory = params['output_data'][0]['extra_files_path']
+    # os.mkdir(target_directory)
+
+    # print "Arguments are parsed."
+    print "\ndownload_location is {:s}".format(str(args.download_location))
+    print "display_name is {:s}".format(str(args.display_name))
+    print "destination_path is {:s}\n".format(str(args.destination_path))
+    root_index_dirname = None
+    # FIX - Prob don't need index_was_downloaded. Not doing anything with it.
+    # But it indicates success downloading the index, so maybe should be checking it.
+    index_was_downloaded = False
+    if (args.download_location != ""):
+        index_directory, root_index_dirname, index_was_downloaded = \
+            download_index(src_location=args.download_location, \
+                           destination=args.destination_path, \
+                           force_download=args.force_download)
+    else:
+        cannonical_destination = os.path.realpath(args.destination_path)
+        if not os.path.exists(cannonical_destination):
+            raise ValueError("Cannot find the Centrifuge Index.\n" + \
+                "The directory does not exist:\n\t{:s}".format(index_directory))
+        # If args.destination_path is a directory containing 
+        # a subdirectory that contains the index files,
+        # then we need to set the index_directory to be that subdirectory.
+        files_in_destination_path = os.listdir(cannonical_destination)
+        if (len(files_in_destination_path) == 1):
+            path_to_file = "{:s}/{:s}".format(cannonical_destination, files_in_destination_path[0])
+            if os.path.isdir(path_to_file):
+                index_directory = path_to_file
+            else:
+                index_directory = cannonical_destination
+        else:
+            index_directory = cannonical_destination
+        # Get the root_index_dirname of the index from the index_directory name.
+        root_index_dirname = index_directory.split("/")[-1].split(".")[0]
+
+    # Check if there is an actual Centrifuge Index file in the index_directory.
+    print "\nThe location of the Centrifuge Index is {:s}.\n".format(index_directory)
+    files_in_index_directory = set(os.listdir(index_directory))
+    index_file_found = False
+    index_file_path = index_directory
+    for filename in files_in_index_directory:
+        # The current index is split into 3 files.
+        # filenames are in the form: index_root_name.#.cf,
+        # where # is a numeral (1, 2, or 3)
+        # indicating the order of the files.
+        if filename.split(".")[-1] == _CentrifugeIndexFileExtension:
+            index_file_found = True
+            # The centrifuge program wants the root name of the files to be final part of the path.
+            index_file_path = "{:s}/{:s}".format(index_directory, filename.split(".")[0])
+    if not index_file_found:
+        raise ValueError("Cannot find any Centrifuge Index files.\n" + \
+            "The contents of the directory {:s} are:\n\t".format(index_directory) + \
+            "\n\t".join(files_in_index_directory))
+
+    # Set the display_name
+    if (args.display_name is None) or (args.display_name == ""):
+        # Use the root_index_dirname.
+        if (root_index_dirname != None) and (root_index_dirname != ""):
+            display_name = _CTAT_Centrifuge_DisplayNamePrefix + root_index_dirname
+        else:
+            display_name = _CTAT_Centrifuge_DisplayNamePrefix + _CTAT_CentrifugeDir_Name
+            print "WARNING: Did not set the display name. Using the default: {:s}".format(display_name_value)
+    else:
+        display_name = _CTAT_Centrifuge_DisplayNamePrefix + args.display_name
+    display_name = display_name.replace(" ","_")
+
+    # Set the unique_id
+    datetime_stamp = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S_%f")
+    if (root_index_dirname != None) and (root_index_dirname != ""):
+        unique_id = root_index_dirname + datetime_stamp
+    else:
+        unique_id = _CTAT_CentrifugeDir_Name + datetime_stamp
+
+    print "The Index's display_name will be set to: {:s}\n".format(display_name)
+    print "Its unique_id will be set to: {:s}\n".format(unique_id)
+    print "Its dir_path will be set to: {:s}\n".format(index_file_path)
+
+    data_manager_dict = {}
+    data_manager_dict['data_tables'] = {}
+    data_manager_dict['data_tables'][_CTAT_CentrifugeIndexTableName] = []
+    data_table_entry = dict(value=unique_id, name=display_name, path=index_file_path)
+    data_manager_dict['data_tables'][_CTAT_CentrifugeIndexTableName].append(data_table_entry)
+
+    # Temporarily the output file's dictionary is written for debugging:
+    print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict))
+    # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
+    # which then puts it into the correct .loc file (I think).
+    # Remove the following line when testing without galaxy package.
+    open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
+
+if __name__ == "__main__":
+    main()
diff -r 3bb91cebec5c -r 367b0d693b0c data_manager/add_ctat_centrifuge_index.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/add_ctat_centrifuge_index.xml	Mon Apr 30 16:17:37 2018 -0400
@@ -0,0 +1,78 @@
+<tool id="ctat_centrifuge_indexes_data_manager" 
+    name="CTAT Centrifuge Indexes Data Manager" 
+    version="1.0.0" tool_type="manage_data">
+    <description>Retrieve, and/or specify the location of, a CTAT Centrifuge Index. 
+    </description>
+    <requirements>
+        <requirement type="package" version="2.7">python</requirement>
+    </requirements>
+    <command detect_errors="default">
+        <![CDATA[
+        python $__tool_directory__/add_ctat_centrifuge_index.py 
+            --display_name "${display_name}" 
+            --destination_path "${destination}" 
+            --output_filename "${out_file}" 
+            #if str( $download_question.download ) == "true":
+                --download_location "\"${download_question.filename}\"" 
+                #if str( $download_question.force_download ) == "true":
+                    --force_download 
+                #end if
+            #end if
+        ]]>
+    </command>
+    <inputs>
+        <!-- The following are left in here, just as examples of various ways of doing options.
+            <param name="force_download" type="boolean" checked="false"
+                truevalue="- -force_download" falsevalue="" label="Force New Download? (yes/no)" />
+            <param name="download" type="select" label="Need to Download?">
+                <option value="single" selected="true">Single Dataset</option>
+                <option value="paired_collection">Paired Collection</option>
+            <when value="paired_collection">
+                 <param name="fastq_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Select dataset pair" help="Specify paired dataset collection containing paired reads"/>
+            </when>
+        -->
+        <conditional name="download_question">
+            <param name="download" type="boolean" checked="false" label="Need to Download?" />
+            <when value="true">
+                <!-- The use of a code block to get dynamic options is now deprecated and discouraged.
+                     I am still using it here. The only other way I can think of to do this is to
+                     create another data_manager that gets the list of files and puts them into a
+                     data_table, that is then used to get the filenames. That would require the admin
+                     to first run the data_manager that builds the filename data_table before running
+                     this data_manager.
+                     However, I have not been able to figure out how to send information back correctly
+                     from the function and there is no documentation that I have found showing how to do it.
+                <param name="filename" type="select" label="Select File" display="radio" 
+                    dynamic_options="get_ctat_centrifuge_index_locations()" 
+                    help="Select a Centrifuge Index to Download." />
+                Hard coded version.
+                <param name="filename" type="text" value="ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz">
+                    <option value="ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz">
+                        p_compressed+h+v
+                    </option>
+                </param>
+                -->
+                <param name="filename" type="select" label="Select File"
+                    dynamic_options="get_ctat_centrifuge_index_locations()" 
+                    help="Select a Centrifuge Index to Download." />
+                <param name="force_download" type="boolean" checked="false" label="Force New Download?" />
+            </when>
+        </conditional>
+
+        <param name="display_name" type="text" label="Centrifuge Index Display Name" />
+        <param name="destination" type="text" label="Local Destination (full path)" />
+    </inputs>
+    <outputs>
+        <data name="out_file" format="data_manager_json" />
+    </outputs>
+    <help>
+        Retrieve, and/or specify the location of, a CTAT Centrifuge Index.
+        When download is true, the file at ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz
+        will be the URL from which the index is downloaded. Currently that is the only supported index.
+        Specify the Full Path of the destination where the CTAT Reference Library should be placed.
+        If you already have the index, specify the full path of the location where it exists and leave the download box unchecked.
+        The display_name may be left empty if downloading. 
+        The display_name will be used as the selector text of the entry in the data table.
+    </help>
+    <code file="add_ctat_centrifuge_index.py" />
+</tool>
diff -r 3bb91cebec5c -r 367b0d693b0c data_manager/add_ctat_ref_lib.py
--- a/data_manager/add_ctat_ref_lib.py	Fri Dec 15 15:53:28 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-# ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
-
-# Rewritten by H.E. Cicada Brokaw Dennis from source downloaded from the toolshed.
-# Eventually this should be modified to allow downloading of more than just the one library,
-# to let the user select what library/location to download, but that would require the
-# download tool to generate the list of libraries to download on the fly. Currently
-# we are only using the one library.
-# Users can create other ones locally and use this tool to add them if they don't want
-# to add them by hand.
-
-import argparse
-import os
-import tarfile
-import urllib
-
-from galaxy.util.json import from_json_string, to_json_string
-
-# The following was used by prior program to get input parameters from the json.
-# Just leaving here for reference.
-#def get_reference_id_name(params):
-#    genome_id = params['param_dict']['genome_id']
-#    genome_name = params['param_dict']['genome_name']
-#    return genome_id, genome_name
-#
-#def get_url(params):
-#    trained_url = params['param_dict']['trained_url']
-#    return trained_url
-
-def download_from_BroadInst(destination):
-    ctat_resource_lib = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz'
-    # FIX - Check that the download directory is empty if it exists. Also, can we check if there is enough space on the device as well?
-    # FIX - Also we want to make sure that destination is absolute fully specified path.
-    os.mkdir(destination)
-    full_filepath = os.path.join(destination, 'GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz')
-
-    #Download ref: https://dzone.com/articles/how-download-file-python
-    #f = urllib2.urlopen(ctat_resource_lib)
-    #data = f.read()
-    #with open(filepath, 'wb') as code:
-    #    code.write(data)
-
-    urllib.urlretrieve(url=ctat_resource_lib, filename=full_filepath)
-    # Put the following into a try statement, so that if there is a failure something can be printed about it before reraising exception.
-    tarfile.open(full_filepath, mode='r:*').extractall()
-    # FIX - There is additional processing that needs to happen for gmap-fusion to work.
-    # Get the root filename of the extracted file. 
-    # That directory is the actual destination that needs to be set as the ctat_genome_resource_library
-
-def main():
-    #Parse Command Line
-    parser = argparse.ArgumentParser()
-    parser.add_argument('-d', '--download', action="store_true", \
-        help='Do not use if you already have the CTAT Resource Library that this program downloads.')
-    parser.add_argument('-g', '--genome_name', default="GRCh38_gencode_v26", \
-        help='Is used as the selector text of the entry in the data table.')
-    parser.add_argument('-p', '--destination_path', \
-        help='Full path of the CTAT Resource Library location or destination.')
-    parser.add_argument('-o', '--output_filename', \
-        help='Name of the output file, where the json dictionary will be written.')
-    args = parser.parse_args()
-
-    # All of the input parameters are written by default to the output file prior to
-    # this program being called.
-    # But I do not get input values from the json file, but rather from command line.
-    # Just leaving the following code as a comment, in case it might be useful to someone later.
-    # params = from_json_string(open(filename).read())
-    # target_directory = params['output_data'][0]['extra_files_path']
-    # os.mkdir(target_directory)
-
-    if args.download:
-        ctat_genome_resource_lib_path = download_from_BroadInst(destination=args.destination_path)
-    else:
-        # FIX - probably should check if this is a valid path with an actual CTAT Genome Ref Lib there.
-        ctat_genome_resource_lib_path = args.destination_path
-
-    if (args.genome_name is None) or (args.genome_name == ""):
-        genome_name = "GRCh38_gencode_v26"
-    else:
-        genome_name = args.genome_name
-    # Set the value to the basename of the directory path minus the extension. 
-    # FIX - Need to make sure is unique. This is not good way to do it. Just doing it this way now for testing.
-    table_entry_value = os.path.basename(ctat_genome_resource_lib_path).split(".")[0]
-    data_manager_dict = {}
-    data_manager_dict['data_tables'] = {}
-    data_manager_dict['data_tables']['ctat_genome_ref_libs'] = []
-    data_table_entry = dict(value=table_entry_value, name=genome_name, path=ctat_genome_resource_lib_path)
-    data_manager_dict['data_tables']['ctat_genome_ref_libs'].append(data_table_entry)
-
-    # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
-    # which then puts it into the correct .loc file (I think).
-    open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
-
-if __name__ == "__main__":
-    main()
-
diff -r 3bb91cebec5c -r 367b0d693b0c data_manager/add_ctat_ref_lib.xml
--- a/data_manager/add_ctat_ref_lib.xml	Fri Dec 15 15:53:28 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-<tool id="ctat_genome_ref_lib_data_manager" 
-    name="CTAT Genome Reference Library Data Manager" 
-    version="1.0.0" tool_type="manage_data">
-    <description>Retrieve, and/or Specify the location of, a CTAT Genome Reference Library. 
-    </description>
-    <requirements>
-        <requirement type="package" version="2.7">python</requirement>
-    </requirements>
-    <command detect_errors="default">
-        <![CDATA[
-        python $__tool_directory__/add_ctat_ref_lib.py ${download}
-            --genome_name "${genome_name}"
-            --destination_path "${destination}" 
-            --output_filename "${out_file}"
-        ]]>
-    </command>
-    <inputs>
-        <param name="download" type="boolean" checked="false"
-             truevalue="--download" falsevalue="" label="Need to Download? (yes/no)" />
-        <param name="genome_name" type="text" label="Reference Genome name" />
-        <param name="destination" type="text" label="Local Destination (full path)" />
-    </inputs>
-    <outputs>
-        <data name="out_file" format="data_manager_json" />
-    </outputs>
-    <help>
-        Retrieve, and/or specify the location of, a CTAT Genome Reference Library.
-        When download is true, the file retrieved and processed is https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz.
-        Specify the Full Path of the location where the CTAT Reference Library should be placed.
-        You will need approximately 30GB of space for this library.
-        If you already have the library, specify the full path of the location where it exists and leave the download box unchecked.
-        The Reference Genome name may be left empty if downloading. The name will be used as the selector text of the entry in the data table.
-        For more information on CTAT Genome Reference Libraries, see <a http="https://github.com/FusionFilter/FusionFilter/wiki">FusionFilter</a>
-    </help>
-</tool>
\ No newline at end of file
diff -r 3bb91cebec5c -r 367b0d693b0c data_manager_conf.xml
--- a/data_manager_conf.xml	Fri Dec 15 15:53:28 2017 -0500
+++ b/data_manager_conf.xml	Mon Apr 30 16:17:37 2018 -0400
@@ -1,39 +1,20 @@
 <?xml version="1.0"?>
 <data_managers>
-    <data_manager tool_file="data_manager/add_ctat_ref_lib.xml" id="ctat_genome_ref_lib_data_manager"> 
-        <data_table name="ctat_genome_ref_libs">
+    <data_manager tool_file="data_manager/add_ctat_centrifuge_index.xml" id="ctat_centrifuge_index_data_manager"> 
+        <data_table name="ctat_centrifuge_indexes">
             <output>
                 <column name="value" />
                     <!-- value is used to uniquely identify this entry in the table.
-                    For now id is also the name of the environment variable that is used within tools to
-                    access a CTAT Resource Library. 
-                    FIX - Need to get rid of that and use command line params...
                     -->
                 <column name="name" />
                     <!-- name is used as the selector in the pull down lists for items in this table.
                     -->
                 <column name="path" />
-                    <!-- path is the absolute path of the corresponding CTAT Genome Reference Library.
-                    -->
-                <!-- <column name="path" output_ref="out_file"> -->
-                    <!-- It is typical to move the data file, but because our tool gets the destination
-                    location from the user, we do not want to move the data from that location.
-                    The full path of the CTAT Resource library is returned in location. 
-                    So no need to change the value either.
+                    <!-- path is the absolute path of the directory containing the centrifuge index files.
                     -->
-                    <!-- <move type="file" relativize_symlinks="False"> -->
-                        <!--<source>${path}</source> -->
-                        <!--<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">ctat_genome_lib_build_dir</target> -->
-                    <!--</move> -->
-                    <!--
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/ctat_genome_lib_build_dir
-                    </value_translation>
-                    -->
-                    <!-- The location returned by the tool should already be an absolute path.
-                    <value_translation type="function">abspath</value_translation>
-                    -->
-                <!--</column> -->
+                <!-- Same as the Genome Reference Library, we create the Centrifuge Index files where we want them.
+                -->
             </output>
         </data_table>
     </data_manager>
-</data_managers>
\ No newline at end of file
+</data_managers>
diff -r 3bb91cebec5c -r 367b0d693b0c tool-data/ctat_centrifuge_indexes.loc.sample
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/ctat_centrifuge_indexes.loc.sample	Mon Apr 30 16:17:37 2018 -0400
@@ -0,0 +1,15 @@
+# This file lists the locations of CTAT Centrifuge Indexes
+# Usually there will only be one index, but it is concievable 
+# that there could be multiple indexes.
+# This file format is as follows
+# (white space characters are TAB characters):
+#
+#<value>    <name>  <path>
+# value is a unique id
+# name is the display name
+# path is the directory where the index files are stored
+#
+#ctat_centrifuge_indexes.loc could look like:
+#
+#p_compressed+h+v CTAT_CentrifugeIndex_p_compressed+h+v  /path/to/centrifuge/index/p_compressed+h+v
+#
diff -r 3bb91cebec5c -r 367b0d693b0c tool-data/ctat_genome_ref_libs.loc.sample
--- a/tool-data/ctat_genome_ref_libs.loc.sample	Fri Dec 15 15:53:28 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-# This file lists the locations of CTAT Genome Reference Libraries
-# Usually there will only be one library, but it is concievable 
-# that there could be multiple libraries.
-# This file format is as follows
-# (white space characters are TAB characters):
-#
-#<unique_id>    <display_name>  <file_path>
-#
-#ctat_genome_ref_libs.loc could look like:
-#
-#CTAT_RESOURCE_LIB  GRCh38_gencode_v26   /ctat/genome/resource/lib/path
-#
diff -r 3bb91cebec5c -r 367b0d693b0c tool_data_table_conf.xml.sample
--- a/tool_data_table_conf.xml.sample	Fri Dec 15 15:53:28 2017 -0500
+++ b/tool_data_table_conf.xml.sample	Mon Apr 30 16:17:37 2018 -0400
@@ -1,6 +1,6 @@
 <tables>
-    <table name="ctat_genome_ref_libs" comment_char="#" allow_duplicate_entries="False">
+    <table name="ctat_centrifuge_indexes" comment_char="#" allow_duplicate_entries="False">
         <columns>value, name, path</columns>
-        <file path="tool-data/ctat_genome_ref_libs.loc" />
+        <file path="tool-data/ctat_centrifuge_indexes.loc" />
     </table>
 </tables>