view data_manager/data_manager_vep_cache_download.py @ 0:9ae9c07fc6c6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_vep_cache_downloader commit 2db33cd5bcf5e2d7e3a43f11855c4cfc3b1b9f56
author iuc
date Wed, 11 May 2022 13:01:51 +0000
parents
children
line wrap: on
line source

#!/usr/bin/env python

import json
import os
import re
import sys
import tarfile
from urllib.request import urlretrieve


def main():
    # Read in given out_file and create target directory for file download
    with open(sys.argv[1]) as fh:
        params = json.load(fh)
    target_directory = params['output_data'][0]['extra_files_path']
    os.mkdir(target_directory)

    # Process parameters for metadata and file download
    url = params['param_dict']['url'].rstrip("/") + "/" + params['param_dict']['file_name'].lstrip("/")
    m = re.search(r"(.*?)(merged|refseq)?_vep_(\d+?)_", params['param_dict']['file_name'])
    version = str(m.group(3))
    cache_type = m.group(2) if m.group(2) else "default"
    species = m.group(1).rstrip("_")
    display_name = f"{species.capitalize().replace('_', ' ')} {params['param_dict']['dbkey']} (V{version}{'' if cache_type == 'default' else ', ' + cache_type.capitalize()})"

    # Download and extract given cache archive, remove archive afterwards
    final_file, headers = urlretrieve(url, os.path.join(target_directory, params['param_dict']['file_name']))
    tar = tarfile.open(final_file, "r:gz")
    tar.extractall(target_directory)
    tar.close()
    os.remove(final_file)

    # Construct metadata for the new data table entry
    data_manager_dict = {
        'data_tables': {
            'vep_versioned_annotation_cache': [
                {
                    'value': params['param_dict']['file_name'].strip(".tar.gz"),
                    'dbkey': params['param_dict']['dbkey'],
                    'version': version,
                    'cachetype': cache_type,
                    'name': display_name,
                    'species': species,
                    'path': './%s' % params['param_dict']['file_name'].strip(".tar.gz")
                }
            ]
        }
    }

    # Save metadata to out_file
    with open(sys.argv[1], 'w') as fh:
        json.dump(data_manager_dict, fh, sort_keys=True)


if __name__ == "__main__":
    main()