Mercurial > repos > iuc > data_manager_plant_tribes_scaffolds_downloader
changeset 1:f5e3438468c7 draft
Uploaded
author | iuc |
---|---|
date | Tue, 14 Feb 2017 13:44:24 -0500 |
parents | b418349edb0d |
children | b3fc4b35e3f5 |
files | .shed.yml data_manager/data_manager_plant_tribes_scaffolds_download.py data_manager/data_manager_plant_tribes_scaffolds_download.xml data_manager_conf.xml tool-data/plant_tribes_scaffolds.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 101 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Feb 14 13:44:24 2017 -0500 @@ -0,0 +1,12 @@ +categories: +- Data Managers +description: Data Manager for installing PlantTribes scaffolds data +homepage_url: http://amborella.huck.psu.edu/ +long_description: | + PlantTribes is a collection of automated modular analysis pipelines that utilize objective + classifications of complete protein sequences from sequenced plant genomes to perform + comparative evolutionary studies. +name: data_manager_plant_tribes_scaffolds_downloader +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader +type: unrestricted
--- a/data_manager/data_manager_plant_tribes_scaffolds_download.py Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager/data_manager_plant_tribes_scaffolds_download.py Tue Feb 14 13:44:24 2017 -0500 @@ -31,9 +31,23 @@ shutil.rmtree(dir) -def url_download(target_directory, url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): - work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) - make_directory(work_directory) +def extract_archive(file_path, work_directory): + if tarfile.is_tarfile(file_path): + fh = tarfile.open(file_path, 'r:*') + elif zipfile.is_zipfile(file_path): + fh = zipfile.ZipFile(file_path, 'r') + else: + return + fh.extractall(work_directory) + + +def move_files(work_directory, target_directory): + # Move the files into defined output directory. + for filename in os.listdir(work_directory): + shutil.move(os.path.join(work_directory, filename), target_directory) + + +def url_download(url, work_directory): file_path = os.path.join(work_directory, os.path.basename(url)) src = None dst = None @@ -54,26 +68,48 @@ src.close() if dst: dst.close() - if tarfile.is_tarfile(file_path): - fh = tarfile.open(file_path, 'r:*') - elif zipfile.is_zipfile(file_path): - fh = zipfile.ZipFile(file_path, 'r') - else: - return - fh.extractall(work_directory) + return file_path + + +def download(target_file_path, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES): + data_manager_dict = {} + data_table_entry = {} + # Download the scaffolds data. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds')) + make_directory(work_directory) + file_path = url_download(web_url) + extract_archive(file_path, work_directory) os.remove(file_path) # Move the scaffolds data files into defined output directory. - for filename in os.listdir(work_directory): - shutil.move(os.path.join(work_directory, filename), target_directory) + target_directory = make_directory(target_file_path) + move_files(work_directory, target_directory) remove_directory(work_directory) - data_manager_dict = {} - # Populate the data table, there should be a single entry in target_directory. + # Populate the data_manager_dict with the scaffolds data entry. for file_path in os.listdir(target_directory): full_path = os.path.abspath(os.path.join(target_directory, file_path)) entry_name = "%s" % os.path.basename(file_path) - data_table_entry = dict(value=entry_name, name=entry_name, path=full_path, description=description) - for data_table_name in data_table_names: - data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) + data_table_entry['value'] = entry_name + data_table_entry['name'] = entry_name + data_table_entry['path'] = full_path + data_table_entry['description'] = description + # Download the default configuration files. + work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs')) + make_directory(work_directory) + file_path = url_download(config_web_url) + extract_archive(file_path, work_directory) + os.remove(file_path) + shutil.rmtree(target_directory) + # Move the scaffolds data files into defined output directory. + target_directory = make_directory(target_file_path) + move_files(work_directory, target_directory) + remove_directory(work_directory) + # Populate the data_manager_dict with the default configs entry. + for file_path in os.listdir(target_directory): + full_path = os.path.abspath(os.path.join(target_directory, file_path)) + data_table_entry['config_path'] = full_path + # Populate the data_man ager_dict. + for data_table_name in data_table_names: + data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) return data_manager_dict @@ -81,15 +117,16 @@ parser.add_argument('--description', dest='description', default=None, help='Description') parser.add_argument('--name', dest='name', help='Data table entry unique ID') parser.add_argument('--out_file', dest='out_file', help='JSON output file') -parser.add_argument('--web_url', dest='web_url', help='Web URL') +parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds') +parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs') args = parser.parse_args() # Some magic happens with tools of type "manage_data" in that the output # file contains some JSON data that allows us to define the target directory. params = json.loads(open(args.out_file).read()) -target_directory = params['output_data'][0]['extra_files_path'] -make_directory(target_directory) +target_file_path = params['output_data'][0]['extra_files_path'] + if args.description is None: description = '' @@ -97,7 +134,7 @@ description = args.description.strip() # Get the scaffolds data. -data_manager_dict = url_download(target_directory, args.web_url, description) +data_manager_dict = download(target_file_path, args.web_url, args.config_web_url, description) # Write the JSON output dataset. fh = open(args.out_file, 'wb') fh.write(json.dumps(data_manager_dict))
--- a/data_manager/data_manager_plant_tribes_scaffolds_download.xml Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager/data_manager_plant_tribes_scaffolds_download.xml Tue Feb 14 13:44:24 2017 -0500 @@ -1,4 +1,4 @@ -<tool id="data_manager_plant_tribes_scaffolds_download" name="PlantTribes Scaffolds Download" version="1.0.0" tool_type="manage_data"> +<tool id="data_manager_plant_tribes_scaffolds_download" name="PlantTribes Scaffolds Download" version="1.1.0" tool_type="manage_data"> <description></description> <stdio> <exit_code range=":-1" level="fatal" description="Error: Cannot open file" /> @@ -7,16 +7,18 @@ <command> <![CDATA[ python $__tool_directory__/data_manager_plant_tribes_scaffolds_download.py - --name "$name" - --description "$description" - --web_url "$web_url" - --out_file "$out_file" + --name '$name' + --description '$description' + --web_url '$web_url' + --config_web_url '$config_web_url' + --out_file '$out_file' ]]> </command> <inputs> <param name="name" type="text" value="" label="Data table entry unique ID"/> <param name="description" type="text" value="" label="Description of the data" help="Value is optional"/> - <param name="web_url" type="text" value="" label="Web URL" optional="False" /> + <param name="web_url" type="text" value="" label="URL for downloading scaffolds" help="Must be same version as configs" optional="False" /> + <param name="config_web_url" type="text" value="" label="URL for downloading configs" help="Must be same version as scaffolds" optional="False" /> </inputs> <outputs> <data name="out_file" format="data_manager_json" /> @@ -28,13 +30,20 @@ **What it does** -This tool fetches scaffolds data used by the PlantTribes Galaxy tools and populates the plant_tribes_scaffolds data table. -The scaffolds data can be imported using a URL, and an optional description can be provided that will appear next to the -scaffolds file name in the data table entry. Scaffolds data provided by the Floral Genome Project can be downloaded here: +This tool fetches scaffolds data and default configuration files used by the PlantTribes Galaxy tools and populates the +plant_tribes_scaffolds data table. Both the scaffolds data and the default configuration files can be imported using a +URL, and an optional description can be provided that will appear next to the scaffolds file name in the data table entry. + +Scaffolds data provided by the Floral Genome Project can be downloaded using these URLs: * 22 plant genomes (Angiosperms clusters, version 1.0): http://fgp.huck.psu.edu/planttribes_data/22Gv1.0.tar.bz2 * 22 plant genomes (Angiosperms clusters, version 1.1): http://fgp.huck.psu.edu/planttribes_data/22Gv1.1.tar.bz2 +Default configuration files provided by the Floral Genome Project can be downloaded using these URLs: + + * 22 plant genomes (Angiosperms clusters, version 1.0): http://fgp.huck.psu.edu/planttribes_config/22Gv1.0.tar.bz2 + * 22 plant genomes (Angiosperms clusters, version 1.1): http://fgp.huck.psu.edu/planttribes_config/22Gv1.1.tar.bz2 + </help> <citations> <citation type="bibtex"> @@ -43,7 +52,7 @@ title = {None}, year = {None}, eprint = {None}, - url = {None} + url = {https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_plant_tribes_scaffolds_downloader} }</citation> </citations> </tool>
--- a/data_manager_conf.xml Fri Jan 13 10:32:14 2017 -0500 +++ b/data_manager_conf.xml Tue Feb 14 13:44:24 2017 -0500 @@ -14,6 +14,14 @@ <value_translation type="function">abspath</value_translation> </column> <column name="description" /> + <column name="config_path" output_ref="out_file"> + <move type="file"> + <source>${config_path}</source> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">plant_tribes/configs/${value}</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/plant_tribes/configs/${value}</value_translation> + <value_translation type="function">abspath</value_translation> + </column> </output> </data_table> </data_manager>
--- a/tool-data/plant_tribes_scaffolds.loc.sample Fri Jan 13 10:32:14 2017 -0500 +++ b/tool-data/plant_tribes_scaffolds.loc.sample Tue Feb 14 13:44:24 2017 -0500 @@ -1,4 +1,4 @@ ## Plant Tribes scaffolds -#Value Name Path Description -#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) -#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) +#Value Name Path Description Config Path +#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0) /plant_tribes/configs/22Gv1.0 +#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1) /plant_tribes/configs/22Gv1.1
--- a/tool_data_table_conf.xml.sample Fri Jan 13 10:32:14 2017 -0500 +++ b/tool_data_table_conf.xml.sample Tue Feb 14 13:44:24 2017 -0500 @@ -1,6 +1,6 @@ <tables> <table name="plant_tribes_scaffolds" comment_char="#"> - <columns>value, name, path, description</columns> + <columns>value, name, path, description, config_path</columns> <file path="tool-data/plant_tribes_scaffolds.loc" /> </table> </tables>