Mercurial > repos > trinity_ctat > ctat_genome_resource_libs_data_manager_3
changeset 0:d2c51cdc2172 draft
Uploaded
author | trinity_ctat |
---|---|
date | Tue, 01 May 2018 12:36:56 -0400 |
parents | |
children | fbe2227fe0d6 |
files | data_manager/add_ctat_resource_lib.py data_manager/add_ctat_resource_lib.xml data_manager_conf.xml tool-data/ctat_genome_resource_libs.loc.sample tool_data_table_conf.xml.sample |
diffstat | 5 files changed, 649 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/add_ctat_resource_lib.py Tue May 01 12:36:56 2018 -0400 @@ -0,0 +1,481 @@ +#!/usr/bin/env python +# ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/ + +# Rewritten by H.E. Cicada Brokaw Dennis from a source downloaded from the toolshed and +# other example code on the web. +# This now allows downloading of a user selected library +# but only from the CTAT Genome Resource Library website. +# Ultimately we might want to allow the user to specify any location +# from which to download. +# Users can create or download other libraries and use this tool to add them if they don't want +# to add them by hand. + +import argparse +import os +#import tarfile +#import urllib +import subprocess + +# Comment out the following line when testing without galaxy package. +from galaxy.util.json import to_json_string +# The following is not being used, but leaving as info +# in case we ever want to get input values using json. +# from galaxy.util.json import from_json_string + +# datetime.now() is used to create the unique_id +from datetime import datetime + +# The FileListParser is used by get_ctat_genome_filenames(), +# which is called by the Data Manager interface (.xml file) to get +# the filenames that are available online at broadinstitute.org +# Not sure best way to do it. +# This object uses HTMLParser to look through the html +# searching for the filenames within anchor tags. +import urllib2 +from HTMLParser import HTMLParser + +_CTAT_ResourceLib_URL = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/' +_CTAT_BuildDir_Name = 'ctat_genome_lib_build_dir' +_CTAT_ResourceLib_DisplayNamePrefix = 'CTAT_GenomeResourceLib_' +_CTAT_ResourceLib_DefaultGenome = 'Unspecified_Genome' +_NumBytesNeededForBuild = 64424509440 # 60 Gigabytes. FIX - This might not be correct. +_Download_TestFile = "write_testfile.txt" +_DownloadSuccessFile = 'download_succeeded.txt' + +class FileListParser(HTMLParser): + def __init__(self): + # Have to use direct call to super class rather than using super(): + # super(FileListParser, self).__init__() + # because HTMLParser is an "old style" class and its inheritance chain does not include object. + HTMLParser.__init__(self) + self.urls = set() + def handle_starttag(self, tag, attrs): + # Look for filename references in anchor tags and add them to urls. + if tag == "a": + # The tag is an anchor tag. + for attribute in attrs: + # print "Checking: {:s}".format(str(attribute)) + if attribute[0] == "href": + # Does the href have a tar.gz in it? + if ("tar.gz" in attribute[1]) and ("md5" not in attribute[1]): + # Add the value to urls. + self.urls.add(attribute[1]) +# End of class FileListParser + +def get_ctat_genome_urls(): + # open the url and retrieve the urls of the files in the directory. + resource = urllib2.urlopen(_CTAT_ResourceLib_URL) + theHTML = resource.read() + filelist_parser = FileListParser() + filelist_parser.feed(theHTML) + # For dynamic options need to return an interable with contents that are tuples with 3 items. + # Item one is a string that is the display name put into the option list. + # Item two is the value that is put into the parameter associated with the option list. + # Item three is a True or False value, indicating whether the item is selected. + options = [] + for i, url in enumerate(filelist_parser.urls): + # The urls look like: + # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz + # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz + filename = url.split("/")[-1] + if filename.split("_")[0] != "Mouse": + # Take out the mouse genome options for now. + # The mouse genome option is not handled correctly yet + options.append((filename, url, i == 0)) + # return a tuple of the urls + return options + +# The following was used by the example program to get input parameters through the json. +# Just leaving here for reference. +# We are getting all of our parameter values through command line arguments. +#def get_reference_id_name(params): +# genome_id = params['param_dict']['genome_id'] +# genome_name = params['param_dict']['genome_name'] +# return genome_id, genome_name +# +#def get_url(params): +# trained_url = params['param_dict']['trained_url'] +# return trained_url + +def download_from_BroadInst(source, destination, force_download): + # Input Parameters + # source is the full URL of the file we want to download. + # It should look something like: + # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz + # destination is the location where the source file will be unarchived. + # Relative paths are expanded using the current working directory, so within Galaxy, + # it is best to send in absolute fully specified path names so you know to where + # the source file going to be extracted. + # force_download will cause a new download and extraction to occur, even if the destination + # has a file in it indicating that a previous download succeeded. + # + # Returns the following: + # return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded) + # downloaded_directory + # The directory which was created as a subdirectory of the destination directory + # when the download occurred, or if there was no download, + # possibly the same directory as destination, if that is where the data resides. + # download_has_source_data + # Is a boolean indicating whether the source file was "source_data" or was "plug-n-play". + # genome_build_directory + # The directory where the genome resource library is or where it should be built. + # It can be the same as the downloaded directory, but is sometimes a subdirectory of it. + # lib_was_downloaded + # Since it doesn't always do the download, the function returns whether download occurred. + lib_was_downloaded = False + + # Get the root filename of the Genome Directory. + src_filename = source.split("/")[-1] + root_genome_dirname = src_filename.split(".")[0] + # If the src_filename indicates it is a source file, as opposed to plug-n-play, + # then we may need to do some post processing on it. + type_of_download = src_filename.split(".")[1] + download_has_source_data = (type_of_download == "source_data") + + # We want to make sure that destination is absolute fully specified path. + cannonical_destination = os.path.realpath(destination) + if os.path.exists(cannonical_destination): + if not os.path.isdir(cannonical_destination): + raise ValueError("The destination is not a directory: " + \ + "{:s}".format(cannonical_destination)) + # else all is good. It is a directory. + else: + # We need to create it. + try: + os.makedirs(cannonical_destination) + except os.error: + print "ERROR: Trying to create the following directory path:" + print "\t{:s}".format(cannonical_destination) + raise + + # Make sure the directory now exists and we can write to it. + if not os.path.exists(cannonical_destination): + # It should have been created, but if it doesn't exist at this point + # in the code, something is wrong. Raise an error. + raise OSError("The destination directory could not be created: " + \ + "{:s}".format(cannonical_destination)) + test_writing_file = "{:s}/{:s}".format(cannonical_destination, _Download_TestFile) + try: + filehandle = open(test_writing_file, "w") + filehandle.write("Testing writing to this file.") + filehandle.close() + os.remove(test_writing_file) + except IOError: + print "The destination directory could not be written into: " + \ + "{:s}".format(cannonical_destination) + raise + + # Get the list of files in the directory, + # We use it to check for a previous download or extraction among other things. + orig_files_in_destdir = set(os.listdir(cannonical_destination)) + # See whether the file has been downloaded already. + download_success_file_path = "{:s}/{:s}".format(cannonical_destination, _DownloadSuccessFile) + if ((_DownloadSuccessFile not in orig_files_in_destdir) \ + or (root_genome_dirname not in orig_files_in_destdir) \ + or force_download): + # Check whether there is enough space on the device for the library. + statvfs = os.statvfs(cannonical_destination) + # fs_size = statvfs.f_frsize * statvfs.f_blocks # Size of filesystem in bytes + # num_free_bytes = statvfs.f_frsize * statvfs.f_bfree # Actual number of free bytes + num_avail_bytes = statvfs.f_frsize * statvfs.f_bavail # Number of free bytes that ordinary users + # are allowed to use (excl. reserved space) + if (num_avail_bytes < _NumBytesNeededForBuild): + raise OSError("There is insufficient space ({:s} bytes)".format(str(num_avail_bytes)) + \ + " on the device of the destination directory: " + \ + "{:s}".format(cannonical_destination)) + + #Previous code to download and untar. Not using anymore. + #full_filepath = os.path.join(destination, src_filename) + # + #Download ref: https://dzone.com/articles/how-download-file-python + #f = urllib2.urlopen(source) + #data = f.read() + #with open(full_filepath, 'wb') as code: + # code.write(data) + # + #Another way to download: + #try: + # urllib.urlretrieve(url=source, filename=full_filepath) + # + #Then untar the file. + #try: + # tarfile.open(full_filepath, mode='r:*').extractall() + + if (_DownloadSuccessFile in orig_files_in_destdir): + # Since we are redoing the download, + # the success file needs to be removed + # until the download has succeeded. + os.remove(download_success_file_path) + # We want to transfer and untar the file without storing the tar file, because that + # adds all that much more space to the needed amount of free space on the disk. + # Use subprocess to pipe the output of curl into tar. + command = "curl {:s} | tar -xzvf - -C {:s}".format(source, cannonical_destination) + try: # to send the command that downloads and extracts the file. + command_output = subprocess.check_output(command, shell=True) + # FIX - not sure check_output is what we want to use. If we want to have an error raised on + # any problem, maybe we should not be checking output. + except subprocess.CalledProcessError: + print "ERROR: Trying to run the following command:\n\t{:s}".format(command) + raise + else: + lib_was_downloaded = True + + # Some code to help us if errors occur. + print "\n*******************************\nFinished download and extraction." + subprocess.check_call("ls -lad {:s}/*".format(cannonical_destination), shell=True) + subprocess.check_call("ls -lad {:s}/*/*".format(cannonical_destination), shell=True) + + newfiles_in_destdir = set(os.listdir(cannonical_destination)) - orig_files_in_destdir + if (root_genome_dirname not in newfiles_in_destdir): + # Perhaps it has a different name than what we expected it to be. + # It will be the file that was not in the directory + # before we did the download and extraction. + found_filename = None + if len(newfiles_in_destdir) == 1: + found_filename = newfiles_in_destdir[0] + else: + for filename in newfiles_in_destdir: + # In most cases, there will only be one new file, but some OS's might have created + # other files in the directory. + # Look for the directory that was downloaded and extracted. + # The correct file's name should be a substring of the tar file that was downloaded. + if filename in src_filename: + found_filename = filename + if found_filename is not None: + root_genome_dirname = found_filename + + downloaded_directory = "{:s}/{:s}".format(cannonical_destination, root_genome_dirname) + + if (os.path.exists(downloaded_directory)): + try: + # Create a file to indicate that the download succeeded. + subprocess.check_call("touch {:s}".format(download_success_file_path), shell=True) + except IOError: + print "The download_success file could not be created: " + \ + "{:s}".format(download_success_file_path) + raise + # Look for the build directory, or specify the path where it should be placed. + if len(os.listdir(downloaded_directory)) == 1: + # Then that one file is a subdirectory that should be the downloaded_directory. + subdir_filename = os.listdir(downloaded_directory)[0] + genome_build_directory = "{:s}/{:s}".format(downloaded_directory, subdir_filename) + else: + genome_build_directory = "{:s}/{:s}".format(downloaded_directory, _CTAT_BuildDir_Name) + else: + raise ValueError("ERROR: Could not find the extracted file in the destination directory:" + \ + "\n\t{:s}".format(cannonical_destination)) + + return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded) + +def gmap_the_library(genome_build_directory): + # This is the processing that needs to happen for gmap-fusion to work. + # genome_build_directory should normally be a fully specified path, + # though it should work if it is relative. + command = "gmap_build -D {:s}/ -d ref_genome.fa.gmap -k 13 {:s}/ref_genome.fa".format( \ + genome_build_directory, genome_build_directory) + try: # to send the gmap_build command. + command_output = subprocess.check_output(command, shell=True) + except subprocess.CalledProcessError: + print "ERROR: While trying to run the gmap_build command on the library:\n\t{:s}".format(command) + raise + finally: + # Some code to help us if errors occur. + print "\n*******************************\nAfter running gmap_build." + if os.path.exists(genome_build_directory): + print "\nBuild Directory {:s}:".format(genome_build_directory) + subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True) + dir_entries = os.listdir(genome_build_directory) + for entry in dir_entries: + entry_path = "{:s}/{:s}".format(genome_build_directory, entry) + print "\nDirectory {:s}:".format(entry_path) + subprocess.check_call("ls -la {:s}".format(entry_path), shell=True) + if os.path.isdir(entry_path): + subdir_entries = os.listdir(entry_path) + for subdir_entry in subdir_entries: + subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry) + print "\nDirectory {:s}:".format(subdir_entry_path) + subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True) + else: + print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory) + print "*******************************" + +def build_the_library(genome_source_directory, genome_build_directory, build, gmap_build): + """ genome_source_directory is the location of the source_data needed to build the library. + Normally it is fully specified, but could be relative. + genome_build_directory is the location where the library will be built. + It can be relative to the current working directory or an absolute path. + build specifies whether to run prep_genome_lib.pl even if it was run before. + gmap_build specifies whether to run gmap_build or not. + + Following was the old way to do it. Before FusionFilter 0.5.0. + prep_genome_lib.pl \ + --genome_fa ref_genome.fa \ + --gtf ref_annot.gtf \ + --blast_pairs blast_pairs.gene_syms.outfmt6.gz \ + --fusion_annot_lib fusion_lib.dat.gz + --output_dir ctat_genome_lib_build_dir + index_pfam_domain_info.pl \ + --pfam_domains PFAM.domtblout.dat.gz \ + --genome_lib_dir ctat_genome_lib_build_dir + gmap_build -D ctat_genome_lib_build_dir -d ref_genome.fa.gmap -k 13 ctat_genome_lib_build_dir/ref_genome.fa" + """ + if (genome_source_directory != "" ) and build: + if os.path.exists(genome_source_directory): + os.chdir(genome_source_directory) + # FIX - look for a fusion_annot_lib and include it, else omit it. + command = "prep_genome_lib.pl --genome_fa ref_genome.fa --gtf ref_annot.gtf " + \ + "--fusion_annot_lib CTAT_HumanFusionLib.v0.1.0.dat.gz " + \ + "--annot_filter_rule AnnotFilterRule.pm " + \ + "--pfam_db PFAM.domtblout.dat.gz " + \ + "--output_dir {:s} ".format(genome_build_directory) + if gmap_build: + command += "--gmap_build " + try: # to send the prep_genome_lib command. + command_output = subprocess.check_call(command, shell=True) + except subprocess.CalledProcessError: + print "ERROR: While trying to run the prep_genome_lib.pl command " + \ + "on the CTAT Genome Resource Library:\n\t{:s}".format(command) + raise + finally: + # Some code to help us if errors occur. + print "*******************************" + if os.path.exists(genome_build_directory): + print "\nSource Directory {:s}:".format(genome_source_directory) + subprocess.check_call("ls -la {:s}".format(genome_source_directory), shell=True) + dir_entries = os.listdir(genome_source_directory) + for entry in dir_entries: + entry_path = "{:s}/{:s}".format(genome_source_directory, entry) + print "\nDirectory {:s}:".format(entry_path) + subprocess.check_call("ls -la {:s}".format(entry_path), shell=True) + else: + print "Genome Source Directory does not exist:\n\t{:s}".format(genome_source_directory) + if os.path.exists(genome_build_directory): + print "\nBuild Directory {:s}:".format(genome_build_directory) + subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True) + dir_entries = os.listdir(genome_build_directory) + for entry in dir_entries: + entry_path = "{:s}/{:s}".format(genome_build_directory, entry) + print "\nDirectory {:s}:".format(entry_path) + subprocess.check_call("ls -la {:s}".format(entry_path), shell=True) + if os.path.isdir(entry_path): + subdir_entries = os.listdir(entry_path) + for subdir_entry in subdir_entries: + subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry) + print "\nDirectory {:s}:".format(subdir_entry_path) + subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True) + else: + print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory) + print "*******************************" + else: + raise ValueError("Cannot build the CTAT Genome Resource Library. " + \ + "The source directory does not exist:\n\t{:s}".format(genome_source_directory)) + elif gmap_build: + gmap_the_library(genome_build_directory) + +def main(): + #Parse Command Line + parser = argparse.ArgumentParser() + parser.add_argument('-s', '--source_url', default="", \ + help='This is the url of a file with the data. They come from https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/.') + parser.add_argument('-n', '--display_name', default="", \ + help='Is used as the display name for the entry of this Genome Resource Library in the data table.') + parser.add_argument('-p', '--destination_path', \ + help='Full path of the CTAT Resource Library location or destination, either where it is, or where it will be placed.') + parser.add_argument('-o', '--output_filename', \ + help='Name of the output file, where the json dictionary will be written.') + parser.add_argument('-f', '--force_download', + help='Forces download of the Genome Resource Library, even if previously downloaded.', action="store_true") + parser.add_argument('-b', '--build', + help='Forces build/rebuild the Genome Resource Library, even if previously built. ' + \ + 'Must have downloaded source_data for this to work.', action="store_true") + parser.add_argument('-m', '--gmap_build', + help='Must be selected if you want the library to be gmapped. ' + \ + 'Will force gmap_build of the Genome Resource Library, even if previously gmapped.', action="store_true") + args = parser.parse_args() + + # All of the input parameters are written by default to the output file prior to + # this program being called. + # But I do not get input values from the json file, but rather from command line. + # Just leaving the following code as a comment, in case it might be useful to someone later. + # params = from_json_string(open(filename).read()) + # target_directory = params['output_data'][0]['extra_files_path'] + # os.mkdir(target_directory) + + # FIX - not sure the lib_was_downloaded actually serves a purpose... + lib_was_downloaded = False + download_has_source_data = False + # If we do not download the directory, the destination_path should be the + # location of the genome resource library. + downloaded_directory = None + # FIX - look inside of the args.destination_path to see if the build directory is inside it or is it. + genome_build_directory = None + # FIX - need to make sure we are handling all "possible" combinations of arguments. + # Probably would be good if we could simplify/remove some of them. + if (args.source_url != ""): + downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded = \ + download_from_BroadInst(source=args.source_url, \ + destination=args.destination_path, \ + force_download=args.force_download) + else: + genome_build_directory = args.destination_path + if not os.path.exists(genome_build_directory): + raise ValueError("Cannot find the CTAT Genome Resource Library. " + \ + "The directory does not exist:\n\t{:s}".format(genome_build_directory)) + # else: + # FIX - Check if there is an actual CTAT Genome Resource Lib there. + # _CTAT_BuildDir_Name + + print "\nThe location of the CTAT Genome Resource Library is {:s}.\n".format(genome_build_directory) + + # Take out builds for testing. + # FIX - We should leave a file indicating build success the same way we do for download success. + if (download_has_source_data or args.build or args.gmap_build) : + build_the_library(downloaded_directory, genome_build_directory, args.build, args.gmap_build) + elif (args.gmap_build): + gmap_the_library(genome_build_directory) + + if (args.source_url != None) and (args.source_url != ""): + # Get the name out of the source's filename. + source_filename_root = args.source_url.split("/")[-1].split(".")[0] + + # Determine the display_name for the library. + if (args.display_name is None) or (args.display_name == ""): + if (source_filename_root != None) and (source_filename_root != ""): + # Get the name out of the source filename. + display_name = _CTAT_ResourceLib_DisplayNamePrefix + source_filename_root + else: + display_name = _CTAT_ResourceLib_DisplayNamePrefix + _CTAT_ResourceLib_DefaultGenome + print "WARNING: We do not have a genome name. Using a default name, that might not be correct." + else: + display_name = _CTAT_ResourceLib_DisplayNamePrefix + args.display_name + display_name = display_name.replace(" ","_") + print "The Genome Name will be set to: {:s}\n".format(display_name) + + # Create a unique_id for the library. + datetime_stamp = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S_%f") + if (source_filename_root != None) and (source_filename_root != ""): + unique_id = source_filename_root + datetime_stamp + elif (downloaded_directory != None) and (downloaded_directory != ""): + unique_id = os.path.basename(downloaded_directory).split(".")[0] + else: + unique_id = _CTAT_ResourceLib_DefaultGenome + datetime_stamp + + print "The Resource Lib's display_name will be set to: {:s}\n".format(display_name) + print "Its unique_id will be set to: {:s}\n".format(unique_id) + print "Its dir_path will be set to: {:s}\n".format(genome_build_directory) + + data_manager_dict = {} + data_manager_dict['data_tables'] = {} + data_manager_dict['data_tables']['ctat_genome_resource_libs'] = [] + data_table_entry = dict(value=unique_id, name=display_name, path=genome_build_directory) + data_manager_dict['data_tables']['ctat_genome_resource_libs'].append(data_table_entry) + + # Temporarily the output file's dictionary is written for debugging: + print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict)) + # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager, + # which then puts it into the correct .loc file (I think). + # Comment out the following line when testing without galaxy package. + open(args.output_filename, 'wb').write(to_json_string(data_manager_dict)) + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/add_ctat_resource_lib.xml Tue May 01 12:36:56 2018 -0400 @@ -0,0 +1,109 @@ +<tool id="ctat_genome_resource_lib_data_manager" + name="CTAT Genome Resource Library Data Manager" + version="1.0.0" tool_type="manage_data"> + <description>Retrieve, and/or specify the location of, a CTAT Genome Resource Library. + </description> + <requirements> + <requirement type="package" version="2.7">python</requirement> + <requirement type="package" version="0.5.0">fusion-filter</requirement> + <!-- gmap-fusion used to be required in order to process downloaded libraries + to create all of the required files and indexes. It includes gmap + and FusionFilter, programs from both of which are needed. + Now there is a bioconda FusionFilter recipe. Lets try using that instead. + <requirement type="package" version="0.3.0">gmap-fusion</requirement> + --> + </requirements> + <command detect_errors="default"> + <![CDATA[ + python $__tool_directory__/add_ctat_resource_lib.py + --display_name "${display_name}" + --destination_path "${destination}" + --output_filename "${out_file}" + #if str( $download_question.download ) == "true": + --source_url "\"${download_question.source_url}\"" + #if str( ${download_question.force_download} ) == "true": + --force_download + #end if + #end if + #if str( ${rebuild} ) == "true": + --build + #end if + #if str( ${gmap_build} ) == "true": + --gmap_build + #end if + ]]> + </command> + <inputs> + <!-- The following are left in here, just as examples of various ways of doing options. + <param name="force_download" type="boolean" checked="false" + truevalue="- -force_download" falsevalue="" label="Force New Download? (yes/no)" /> + <param name="download" type="select" label="Need to Download?"> + <option value="single" selected="true">Single Dataset</option> + <option value="paired_collection">Paired Collection</option> + <when value="paired_collection"> + <param name="fastq_input" format="fastqsanger" type="data_collection" collection_type="paired" label="Select dataset pair" help="Specify paired dataset collection containing paired reads"/> + </when> + --> + <conditional name="download_question"> + <param name="download" type="boolean" checked="false" label="Need to Download?" /> + </param> + <when value="true"> + <!-- The use of a code block to get dynamic options is now deprecated and discouraged. + I am still using it here. The only other way I can think of to do this is to + create another data_manager that gets the list of files and puts them into a + data_table, that is then used to get the filenames. That would require the admin + to first run the data_manager that builds the filename data_table before running + this data_manager. + This is the dynamic way to get the options filled. + <param name="filename" type="select" label="Select File" display="radio" + dynamic_options="get_ctat_genome_filenames()" + help="Select a CTAT Genome Resource Library to Download." /> + Here is the static method for what is online in April 2017: + <param name="filename" type="select" label="Choose which library to download."> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz"> + GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz + </option> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ GRCh37_v19_CTAT_lib_Feb092018.source_data.tar.gz"> + GRCh37_v19_CTAT_lib_Feb092018.source_data.tar.gz + </option> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz"> + GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz + </option> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ GRCh38_v27_CTAT_lib_Feb092018.source_data.tar.gz"> + GRCh38_v27_CTAT_lib_Feb092018.source_data.tar.gz + </option> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ Mouse_M16_CTAT_lib_Feb202018.plug-n-play.tar.gz"> + Mouse_M16_CTAT_lib_Feb202018.plug-n-play.tar.gz + </option> + <option value="https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz"> + Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz + </option> + --> + <param name="source_url" type="select" label="Select a File" + dynamic_options="get_ctat_genome_urls()" + help="Select a CTAT Genome Resource Library to Download." /> + </param> + <param name="force_download" type="boolean" checked="false" label="Force New Download?" /> + </when> + </conditional> + + <param name="display_name" type="text" label="Reference Genome Display Name" /> + <param name="destination" type="text" label="Local Destination (full path)" /> + <param name="rebuild" type="boolean" checked="false" label="Force rebuild of Library?" /> + <param name="gmap_build" type="boolean" checked="false" label="Do a gmap_build on the Library?" /> + </inputs> + <outputs> + <data name="out_file" format="data_manager_json" /> + </outputs> + <help> + Retrieve, and/or specify the location of, a CTAT Genome Resource Library. + When download is true, the files at https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/ + are used as selectors for the user to choose among. + Specify the Full Path of the location where the CTAT Resource Library should be placed. + You will need approximately 60GB of space for this library. + If you already have the library, specify the full path of the location where it exists and leave the download box unchecked. + The Reference Genome name may be left empty if downloading. The name will be used as the selector text of the entry in the data table. + For more information on CTAT Genome Resource Libraries, see <a http="https://github.com/FusionFilter/FusionFilter/wiki">FusionFilter</a> + </help> + <code file="add_ctat_resource_lib.py" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Tue May 01 12:36:56 2018 -0400 @@ -0,0 +1,38 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/add_ctat_resource_lib.xml" id="ctat_genome_resource_lib_data_manager"> + <data_table name="ctat_genome_resource_libs"> + <output> + <column name="value" /> + <!-- value is used to uniquely identify this entry in the table. + --> + <column name="name" /> + <!-- name is used as the selector in the pull down lists for items in this table. + --> + <column name="path" /> + <!-- path is the absolute path of the top level directory of the CTAT Genome Resource Library. + --> + <!-- <column name="path" output_ref="out_file"> --> + <!-- It is typical to move the data file, but because our tool gets the destination + location from the user, we do not want to move the data from that location. + The full path of the CTAT Resource library is returned in location. + So no need to change the value either. + The files are so big we do not want to be making copies of them. + They are created where we want them. + --> + <!-- <move type="file" relativize_symlinks="False"> --> + <!--<source>${path}</source> --> + <!--<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">ctat_genome_lib_build_dir</target> --> + <!--</move> --> + <!-- + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/ctat_genome_lib_build_dir + </value_translation> + --> + <!-- The location returned by the tool should already be an absolute path. + <value_translation type="function">abspath</value_translation> + --> + <!--</column> --> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/ctat_genome_resource_libs.loc.sample Tue May 01 12:36:56 2018 -0400 @@ -0,0 +1,15 @@ +# This file lists the locations of CTAT Genome Resource Libraries +# Usually there will only be one library, but it is concievable +# that there could be multiple libraries. +# This file format is as follows +# (white space characters are TAB characters): +# +#<value> <name> <path> +# value is a unique id +# name is the display name +# path is the directory where the genome resource lib files are stored +# +#ctat_genome_resource_libs.loc could look like: +# +#GRCh38_v27_CTAT_lib_Feb092018 CTAT_GenomeResourceLib_GRCh38_v27_CTAT_lib_Feb092018 /path/to/ctat/genome/resource/lib/directory +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Tue May 01 12:36:56 2018 -0400 @@ -0,0 +1,6 @@ +<tables> + <table name="ctat_genome_resource_libs" comment_char="#" allow_duplicate_entries="False"> + <columns>value, name, path</columns> + <file path="tool-data/ctat_genome_resource_libs.loc" /> + </table> +</tables>