# HG changeset patch # User iuc # Date 1487097864 18000 # Node ID f5e3438468c7172b56d0bdf5d654c602d9fc39c3 # Parent b418349edb0d8e55db52d1b802d5ea888bcbaeca Uploaded diff -r b418349edb0d -r f5e3438468c7 .shed.yml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Feb 14 13:44:24 2017 -0500 @@ -0,0 +1,12 @@ +categories: +- Data Managers +description: Data Manager for installing PlantTribes scaffolds data +homepage_url: http://amborella.huck.psu.edu/ +long_description: | + PlantTribes is a collection of automated modular analysis pipelines that utilize objective + classifications of complete protein sequences from sequenced plant genomes to perform + comparative evolutionary studies. +name: data_manager_plant_tribes_scaffolds_downloader +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader +type: unrestricted diff -r b418349edb0d -r f5e3438468c7 data_manager/data_manager_plant_tribes_scaffolds_download.py --- a/data_manager/data_manager_plant_tribes_scaffolds_download.py Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager/data_manager_plant_tribes_scaffolds_download.py Tue Feb 14 13:44:24 2017 -0500 @@ -31,9 +31,23 @@ shutil.rmtree(dir) -def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): - work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) - make_directory(work_directory) +def extract_archive(file_path, work_directory): + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(work_directory) + + +def move_files(work_directory, target_directory): + # Move the files into defined output directory. + for filename in os.listdir(work_directory): + shutil.move(os.path.join(work_directory, filename), target_directory) + + +def url_download(url, work_directory): file_path = os.path.join(work_directory, os.path.basename(url)) src = None dst = None @@ -54,26 +68,48 @@ src.close() if dst: dst.close() - if tarfile.is_tarfile(file_path): - fh = tarfile.open(file_path, 'r:*') - elif zipfile.is_zipfile(file_path): - fh = zipfile.ZipFile(file_path, 'r') - else: - return - fh.extractall(work_directory) + return file_path + + +def download(target_file_path, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): + data_manager_dict = {} + data_table_entry = {} + # Download the scaffolds data. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) + make_directory(work_directory) + file_path = url_download(web_url) + extract_archive(file_path, work_directory) os.remove(file_path) # Move the scaffolds data files into defined output directory. - for filename in os.listdir(work_directory): - shutil.move(os.path.join(work_directory, filename), target_directory) + target_directory = make_directory(target_file_path) + move_files(work_directory, target_directory) remove_directory(work_directory) - data_manager_dict = {} - # Populate the data table, there should be a single entry in target_directory. + # Populate the data_manager_dict with the scaffolds data entry. for file_path in os.listdir(target_directory): full_path = os.path.abspath(os.path.join(target_directory, file_path)) entry_name = "%s" % os.path.basename(file_path) - data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description) - for data_table_name in data_table_names: - data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + data_table_entry['value'] = entry_name + data_table_entry['name'] = entry_name + data_table_entry['path'] = full_path + data_table_entry['description'] = description + # Download the default configuration files. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) + make_directory(work_directory) + file_path = url_download(config_web_url) + extract_archive(file_path, work_directory) + os.remove(file_path) + shutil.rmtree(target_directory) + # Move the scaffolds data files into defined output directory. + target_directory = make_directory(target_file_path) + move_files(work_directory, target_directory) + remove_directory(work_directory) + # Populate the data_manager_dict with the default configs entry. + for file_path in os.listdir(target_directory): + full_path = os.path.abspath(os.path.join(target_directory, file_path)) + data_table_entry['config_path'] = full_path + # Populate the data_man ager_dict. + for data_table_name in data_table_names: + data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) return data_manager_dict @@ -81,15 +117,16 @@ parser.add_argument('--description', dest='description', default=None, help='Description') parser.add_argument('--name', dest='name', help='Data table entry unique ID') parser.add_argument('--out_file', dest='out_file', help='JSON output file') -parser.add_argument('--web_url', dest='web_url', help='Web URL') +parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') +parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') args = parser.parse_args() # Some magic happens with tools of type "manage_data" in that the output # file contains some JSON data that allows us to define the target directory. params = json.loads(open(args.out_file).read()) -target_directory = params['output_data'][0]['extra_files_path'] -make_directory(target_directory) +target_file_path = params['output_data'][0]['extra_files_path'] + if args.description is None: description = '' @@ -97,7 +134,7 @@ description = args.description.strip() # Get the scaffolds data. -data_manager_dict = url_download(target_directory, args.web_url, description) +data_manager_dict = download(target_file_path, args.web_url, args.config_web_url, description) # Write the JSON output dataset. fh = open(args.out_file, 'wb') fh.write(json.dumps(data_manager_dict)) diff -r b418349edb0d -r f5e3438468c7 data_manager/data_manager_plant_tribes_scaffolds_download.xml --- a/data_manager/data_manager_plant_tribes_scaffolds_download.xml Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager/data_manager_plant_tribes_scaffolds_download.xml Tue Feb 14 13:44:24 2017 -0500 @@ -1,4 +1,4 @@ - + @@ -7,16 +7,18 @@ - + + @@ -28,13 +30,20 @@ **What it does** -This tool fetches scaffolds data used by the PlantTribes Galaxy tools and populates the plant_tribes_scaffolds data table. -The scaffolds data can be imported using a URL, and an optional description can be provided that will appear next to the -scaffolds file name in the data table entry. Scaffolds data provided by the Floral Genome Project can be downloaded here: +This tool fetches scaffolds data and default configuration files used by the PlantTribes Galaxy tools and populates the +plant_tribes_scaffolds data table. Both the scaffolds data and the default configuration files can be imported using a +URL, and an optional description can be provided that will appear next to the scaffolds file name in the data table entry. + +Scaffolds data provided by the Floral Genome Project can be downloaded using these URLs: * 22 plant genomes (Angiosperms clusters, version 1.0): http://fgp.huck.psu.edu/planttribes_data/22Gv1.0.tar.bz2 * 22 plant genomes (Angiosperms clusters, version 1.1): http://fgp.huck.psu.edu/planttribes_data/22Gv1.1.tar.bz2 +Default configuration files provided by the Floral Genome Project can be downloaded using these URLs: + + * 22 plant genomes (Angiosperms clusters, version 1.0): http://fgp.huck.psu.edu/planttribes_config/22Gv1.0.tar.bz2 + * 22 plant genomes (Angiosperms clusters, version 1.1): http://fgp.huck.psu.edu/planttribes_config/22Gv1.1.tar.bz2 + @@ -43,7 +52,7 @@ title = {None}, year = {None}, eprint = {None}, - url = {None} + url = {https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader} } diff -r b418349edb0d -r f5e3438468c7 data_manager_conf.xml --- a/data_manager_conf.xml Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager_conf.xml Tue Feb 14 13:44:24 2017 -0500 @@ -14,6 +14,14 @@ abspath + + + ${config_path} + plant_tribes/configs/${value} + + ${GALAXY_DATA_MANAGER_DATA_PATH}/plant_tribes/configs/${value} + abspath + diff -r b418349edb0d -r f5e3438468c7 tool-data/plant_tribes_scaffolds.loc.sample --- a/tool-data/plant_tribes_scaffolds.loc.sample Fri Jan 13 10:32:14 2017 -0500 +++ b/tool-data/plant_tribes_scaffolds.loc.sample Tue Feb 14 13:44:24 2017 -0500 @@ -1,4 +1,4 @@ ## Plant Tribes scaffolds -#Value Name Path Description -#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) -#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) +#Value Name Path Description Config Path +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) /plant_tribes/configs/22Gv1.0 +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) /plant_tribes/configs/22Gv1.1 diff -r b418349edb0d -r f5e3438468c7 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Fri Jan 13 10:32:14 2017 -0500 +++ b/tool_data_table_conf.xml.sample Tue Feb 14 13:44:24 2017 -0500 @@ -1,6 +1,6 @@ - value, name, path, description + value, name, path, description, config_path