0
|
1 #!/usr/bin/env python
|
|
2 # ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
|
|
3
|
|
4 # Rewritten by H.E. Cicada Brokaw Dennis from source downloaded from the toolshed.
|
|
5 # Eventually this should be modified to allow downloading of more than just the one library,
|
|
6 # to let the user select what library/location to download, but that would require the
|
|
7 # download tool to generate the list of libraries to download on the fly. Currently
|
|
8 # we are only using the one library.
|
|
9 # Users can create other ones locally and use this tool to add them if they don't want
|
|
10 # to add them by hand.
|
|
11
|
|
12 import argparse
|
|
13 import os
|
|
14 import tarfile
|
|
15 import urllib
|
|
16
|
|
17 from galaxy.util.json import from_json_string, to_json_string
|
|
18
|
|
19 # The following was used by prior program to get input parameters from the json.
|
|
20 # Just leaving here for reference.
|
|
21 #def get_reference_id_name(params):
|
|
22 # genome_id = params['param_dict']['genome_id']
|
|
23 # genome_name = params['param_dict']['genome_name']
|
|
24 # return genome_id, genome_name
|
|
25 #
|
|
26 #def get_url(params):
|
|
27 # trained_url = params['param_dict']['trained_url']
|
|
28 # return trained_url
|
|
29
|
|
30 def download_from_BroadInst(destination):
|
|
31 ctat_resource_lib = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz'
|
|
32 # FIX - Check that the download directory is empty if it exists. Also, can we check if there is enough space on the device as well?
|
|
33 # FIX - Also we want to make sure that destination is absolute fully specified path.
|
|
34 os.mkdir(destination)
|
|
35 full_filepath = os.path.join(destination, 'GRCh38_gencode_v26_CTAT_lib_Nov012017.plug-n-play.tar.gz')
|
|
36
|
|
37 #Download ref: https://dzone.com/articles/how-download-file-python
|
|
38 #f = urllib2.urlopen(ctat_resource_lib)
|
|
39 #data = f.read()
|
|
40 #with open(filepath, 'wb') as code:
|
|
41 # code.write(data)
|
|
42
|
|
43 urllib.urlretrieve(url=ctat_resource_lib, filename=full_filepath)
|
|
44 # Put the following into a try statement, so that if there is a failure something can be printed about it before reraising exception.
|
|
45 tarfile.open(full_filepath, mode='r:*').extractall()
|
|
46 # FIX - There is additional processing that needs to happen for gmap-fusion to work.
|
|
47 # Get the root filename of the extracted file.
|
|
48 # That directory is the actual destination that needs to be set as the ctat_genome_resource_library
|
|
49
|
|
50 def main():
|
|
51 #Parse Command Line
|
|
52 parser = argparse.ArgumentParser()
|
|
53 parser.add_argument('-d', '--download', action="store_true", \
|
|
54 help='Do not use if you already have the CTAT Resource Library that this program downloads.')
|
|
55 parser.add_argument('-g', '--genome_name', default="GRCh38_gencode_v26", \
|
|
56 help='Is used as the selector text of the entry in the data table.')
|
|
57 parser.add_argument('-p', '--destination_path', \
|
|
58 help='Full path of the CTAT Resource Library location or destination.')
|
|
59 parser.add_argument('-o', '--output_filename', \
|
|
60 help='Name of the output file, where the json dictionary will be written.')
|
|
61 args = parser.parse_args()
|
|
62
|
|
63 # All of the input parameters are written by default to the output file prior to
|
|
64 # this program being called.
|
|
65 # But I do not get input values from the json file, but rather from command line.
|
|
66 # Just leaving the following code as a comment, in case it might be useful to someone later.
|
|
67 # params = from_json_string(open(filename).read())
|
|
68 # target_directory = params['output_data'][0]['extra_files_path']
|
|
69 # os.mkdir(target_directory)
|
|
70
|
|
71 if args.download:
|
|
72 ctat_genome_resource_lib_path = download_from_BroadInst(destination=args.destination_path)
|
|
73 else:
|
|
74 # FIX - probably should check if this is a valid path with an actual CTAT Genome Ref Lib there.
|
|
75 ctat_genome_resource_lib_path = args.destination_path
|
|
76
|
|
77 if (args.genome_name is None) or (args.genome_name == ""):
|
|
78 genome_name = "GRCh38_gencode_v26"
|
|
79 else:
|
|
80 genome_name = args.genome_name
|
14
|
81 # Set the value to the basename of the directory path minus the extension.
|
|
82 # FIX - Need to make sure is unique. This is not good way to do it. Just doing it this way now for testing.
|
|
83 table_entry_value = os.path.basename(ctat_genome_resource_lib_path).split(".")[0]
|
0
|
84 data_manager_dict = {}
|
|
85 data_manager_dict['data_tables'] = {}
|
|
86 data_manager_dict['data_tables']['ctat_genome_ref_libs'] = []
|
14
|
87 data_table_entry = dict(value=table_entry_value, name=genome_name, path=ctat_genome_resource_lib_path)
|
0
|
88 data_manager_dict['data_tables']['ctat_genome_ref_libs'].append(data_table_entry)
|
|
89
|
|
90 # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
|
|
91 # which then puts it into the correct .loc file (I think).
|
|
92 open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
|
|
93
|
|
94 if __name__ == "__main__":
|
|
95 main()
|
|
96
|