Mercurial > repos > dvanzessen > vep_emc
view data_manager/fetch_vep_cache_data.py @ 2:17c98d091710 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:19:31 -0400 |
| parents | |
| children |
line wrap: on
line source
import argparse import os import json import re import pprint import subprocess import sys if __name__ == "__main__": parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly") parser.add_argument("--output-file") parser.add_argument("--output-dir") parser.add_argument("--species") parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl") args = parser.parse_args() output_file = args.output_file output_dir = args.output_dir species = args.species assembly = "" if species.startswith("homo_sapiens"): if species.endswith("37"): assembly = " --ASSEMBLY GRCh37" elif species.endswith("38"): assembly = " --ASSEMBLY GRCh38" else: print("Unknown human assembly") sys.exit(1) species = "homo_sapiens" species_type = args.species_type if species_type in ["refseq", "merged"]: species = "{0}_{1}".format(species, species_type) with open(output_file) as output_file_handle: params = json.loads(output_file_handle.read()) print(output_file) print(output_dir) print(species) print(species_type) pprint.pprint(params) if not os.path.exists(output_dir): os.makedirs(output_dir) vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly) print("Running INSTALL.pl") print(vep_install_cmd) exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True) print(exit_code) output_dict = dict( data_tables=dict( vep_cache_data=[{ "value": species, "path": output_dir, "dbkey": args.species, "type": species_type, "name": "{0} ({1})".format(args.species, species_type) }] ) ) with open(output_file, 'w') as output_file_handle: output_file_handle.write(json.dumps(output_dict)) sys.exit(exit_code)
