comparison data_manager/fetch_vep_cache_data.py @ 2:17c98d091710 draft

Uploaded
author dvanzessen
date Mon, 15 Jul 2019 05:19:31 -0400
parents
children
comparison
equal deleted inserted replaced
1:268244627bf2 2:17c98d091710
1 import argparse
2 import os
3 import json
4 import re
5 import pprint
6 import subprocess
7 import sys
8
9 if __name__ == "__main__":
10 parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly")
11 parser.add_argument("--output-file")
12 parser.add_argument("--output-dir")
13 parser.add_argument("--species")
14 parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl")
15 args = parser.parse_args()
16
17 output_file = args.output_file
18 output_dir = args.output_dir
19
20 species = args.species
21 assembly = ""
22
23 if species.startswith("homo_sapiens"):
24 if species.endswith("37"):
25 assembly = " --ASSEMBLY GRCh37"
26 elif species.endswith("38"):
27 assembly = " --ASSEMBLY GRCh38"
28 else:
29 print("Unknown human assembly")
30 sys.exit(1)
31 species = "homo_sapiens"
32
33 species_type = args.species_type
34 if species_type in ["refseq", "merged"]:
35 species = "{0}_{1}".format(species, species_type)
36
37 with open(output_file) as output_file_handle:
38 params = json.loads(output_file_handle.read())
39
40 print(output_file)
41 print(output_dir)
42 print(species)
43 print(species_type)
44 pprint.pprint(params)
45
46 if not os.path.exists(output_dir):
47 os.makedirs(output_dir)
48
49 vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly)
50
51 print("Running INSTALL.pl")
52 print(vep_install_cmd)
53 exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True)
54
55 print(exit_code)
56
57 output_dict = dict(
58 data_tables=dict(
59 vep_cache_data=[{
60 "value": species,
61 "path": output_dir,
62 "dbkey": args.species,
63 "type": species_type,
64 "name": "{0} ({1})".format(args.species, species_type)
65 }]
66 )
67 )
68 with open(output_file, 'w') as output_file_handle:
69 output_file_handle.write(json.dumps(output_dict))
70 sys.exit(exit_code)