view data_manager/fetch_vep_cache_data.py @ 2:17c98d091710 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:19:31 -0400
parents
children
line wrap: on
line source

import argparse
import os
import json
import re
import pprint
import subprocess
import sys

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly")
    parser.add_argument("--output-file")
    parser.add_argument("--output-dir")
    parser.add_argument("--species")
    parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl")
    args = parser.parse_args()

    output_file = args.output_file
    output_dir = args.output_dir

    species = args.species
    assembly = ""
    
    if species.startswith("homo_sapiens"):
        if species.endswith("37"):
            assembly = " --ASSEMBLY GRCh37"
        elif species.endswith("38"):
            assembly = " --ASSEMBLY GRCh38"
        else:
            print("Unknown human assembly")
            sys.exit(1)
        species = "homo_sapiens"

    species_type = args.species_type
    if species_type in ["refseq", "merged"]:
        species = "{0}_{1}".format(species, species_type)
    
    with open(output_file) as output_file_handle:
        params = json.loads(output_file_handle.read())

    print(output_file)
    print(output_dir)
    print(species)
    print(species_type)
    pprint.pprint(params)

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly)

    print("Running INSTALL.pl")
    print(vep_install_cmd)
    exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True)

    print(exit_code)

    output_dict = dict(
        data_tables=dict(
            vep_cache_data=[{
                "value": species,
                "path": output_dir,
                "dbkey": args.species,
                "type": species_type,
                "name": "{0} ({1})".format(args.species, species_type)
            }]
        )
    )
    with open(output_file, 'w') as output_file_handle:
        output_file_handle.write(json.dumps(output_dict))
    sys.exit(exit_code)