Mercurial > repos > dvanzessen > vep_emc
diff data_manager/fetch_vep_cache_data.py @ 2:17c98d091710 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:19:31 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/fetch_vep_cache_data.py Mon Jul 15 05:19:31 2019 -0400 @@ -0,0 +1,70 @@ +import argparse +import os +import json +import re +import pprint +import subprocess +import sys + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly") + parser.add_argument("--output-file") + parser.add_argument("--output-dir") + parser.add_argument("--species") + parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl") + args = parser.parse_args() + + output_file = args.output_file + output_dir = args.output_dir + + species = args.species + assembly = "" + + if species.startswith("homo_sapiens"): + if species.endswith("37"): + assembly = " --ASSEMBLY GRCh37" + elif species.endswith("38"): + assembly = " --ASSEMBLY GRCh38" + else: + print("Unknown human assembly") + sys.exit(1) + species = "homo_sapiens" + + species_type = args.species_type + if species_type in ["refseq", "merged"]: + species = "{0}_{1}".format(species, species_type) + + with open(output_file) as output_file_handle: + params = json.loads(output_file_handle.read()) + + print(output_file) + print(output_dir) + print(species) + print(species_type) + pprint.pprint(params) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly) + + print("Running INSTALL.pl") + print(vep_install_cmd) + exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True) + + print(exit_code) + + output_dict = dict( + data_tables=dict( + vep_cache_data=[{ + "value": species, + "path": output_dir, + "dbkey": args.species, + "type": species_type, + "name": "{0} ({1})".format(args.species, species_type) + }] + ) + ) + with open(output_file, 'w') as output_file_handle: + output_file_handle.write(json.dumps(output_dict)) + sys.exit(exit_code) \ No newline at end of file
