Mercurial > repos > trinity_ctat > ctat_genome_ref_lib_data_manager_test2
changeset 3:1bde47139a8d draft
Deleted selected files
author | trinity_ctat |
---|---|
date | Tue, 12 Dec 2017 15:17:21 -0500 |
parents | 65a69144e53d |
children | 07fc4bf0cff4 |
files | data_manager/add_ctat_ref_lib.py |
diffstat | 1 files changed, 0 insertions(+), 94 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/add_ctat_ref_lib.py Tue Dec 12 15:05:24 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -#!/usr/bin/env python -# ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/ - -# Rewritten by H.E. Cicada Brokaw Dennis from source downloaded from the toolshed. -# Eventually this should be modified to allow downloading of more than just the one library, -# to let the user select what library/location to download, but that would require the -# download tool to generate the list of libraries to download on the fly. Currently -# we are only using the one library. -# Users can create other ones locally and use this tool to add them if they don't want -# to add them by hand. - -import argparse -import os -import tarfile -import urllib - -from galaxy.util.json import from_json_string, to_json_string - -# The following was used by prior program to get input parameters from the json. -# Just leaving here for reference. -#def get_reference_id_name(params): -# genome_id = params['param_dict']['genome_id'] -# genome_name = params['param_dict']['genome_name'] -# return genome_id, genome_name -# -#def get_url(params): -# trained_url = params['param_dict']['trained_url'] -# return trained_url - -def download_from_BroadInst(destination): - ctat_resource_lib = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz' - # FIX - Check that the download directory is empty if it exists. Also, can we check if there is enough space on the device as well? - # FIX - Also we want to make sure that destination is absolute fully specified path. - os.mkdir(destination) - full_filepath = os.path.join(destination, 'GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz') - - #Download ref: https://dzone.com/articles/how-download-file-python - #f = urllib2.urlopen(ctat_resource_lib) - #data = f.read() - #with open(filepath, 'wb') as code: - # code.write(data) - - urllib.urlretrieve(url=ctat_resource_lib, filename=full_filepath) - # Put the following into a try statement, so that if there is a failure something can be printed about it before reraising exception. - tarfile.open(full_filepath, mode='r:*').extractall() - # FIX - There is additional processing that needs to happen for gmap-fusion to work. - # Get the root filename of the extracted file. - # That directory is the actual destination that needs to be set as the ctat_genome_resource_library - -def main(): - #Parse Command Line - parser = argparse.ArgumentParser() - parser.add_argument('-d', '--download', action="store_true", \ - help='Do not use if you already have the CTAT Resource Library that this program downloads.') - parser.add_argument('-g', '--genome_name', default="GRCh38_gencode_v26", \ - help='Is used as the selector text of the entry in the data table.') - parser.add_argument('-p', '--destination_path', \ - help='Full path of the CTAT Resource Library location or destination.') - parser.add_argument('-o', '--output_filename', \ - help='Name of the output file, where the json dictionary will be written.') - args = parser.parse_args() - - # All of the input parameters are written by default to the output file prior to - # this program being called. - # But I do not get input values from the json file, but rather from command line. - # Just leaving the following code as a comment, in case it might be useful to someone later. - # params = from_json_string(open(filename).read()) - # target_directory = params['output_data'][0]['extra_files_path'] - # os.mkdir(target_directory) - - if args.download: - ctat_genome_resource_lib_path = download_from_BroadInst(destination=args.destination_path) - else: - # FIX - probably should check if this is a valid path with an actual CTAT Genome Ref Lib there. - ctat_genome_resource_lib_path = args.destination_path - - if (args.genome_name is None) or (args.genome_name == ""): - genome_name = "GRCh38_gencode_v26" - else: - genome_name = args.genome_name - - data_manager_dict = {} - data_manager_dict['data_tables'] = {} - data_manager_dict['data_tables']['ctat_genome_ref_libs'] = [] - data_table_entry = dict(value="CTAT_RESOURCE_LIB", name=genome_name, path=ctat_genome_resource_lib_path) - data_manager_dict['data_tables']['ctat_genome_ref_libs'].append(data_table_entry) - - # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager, - # which then puts it into the correct .loc file (I think). - open(args.output_filename, 'wb').write(to_json_string(data_manager_dict)) - -if __name__ == "__main__": - main() -