|
2
|
1 import argparse
|
|
|
2 import os
|
|
|
3 import json
|
|
|
4 import re
|
|
|
5 import pprint
|
|
|
6 import subprocess
|
|
|
7 import sys
|
|
|
8
|
|
|
9 if __name__ == "__main__":
|
|
|
10 parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly")
|
|
|
11 parser.add_argument("--output-file")
|
|
|
12 parser.add_argument("--output-dir")
|
|
|
13 parser.add_argument("--species")
|
|
|
14 parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl")
|
|
|
15 args = parser.parse_args()
|
|
|
16
|
|
|
17 output_file = args.output_file
|
|
|
18 output_dir = args.output_dir
|
|
|
19
|
|
|
20 species = args.species
|
|
|
21 assembly = ""
|
|
|
22
|
|
|
23 if species.startswith("homo_sapiens"):
|
|
|
24 if species.endswith("37"):
|
|
|
25 assembly = " --ASSEMBLY GRCh37"
|
|
|
26 elif species.endswith("38"):
|
|
|
27 assembly = " --ASSEMBLY GRCh38"
|
|
|
28 else:
|
|
|
29 print("Unknown human assembly")
|
|
|
30 sys.exit(1)
|
|
|
31 species = "homo_sapiens"
|
|
|
32
|
|
|
33 species_type = args.species_type
|
|
|
34 if species_type in ["refseq", "merged"]:
|
|
|
35 species = "{0}_{1}".format(species, species_type)
|
|
|
36
|
|
|
37 with open(output_file) as output_file_handle:
|
|
|
38 params = json.loads(output_file_handle.read())
|
|
|
39
|
|
|
40 print(output_file)
|
|
|
41 print(output_dir)
|
|
|
42 print(species)
|
|
|
43 print(species_type)
|
|
|
44 pprint.pprint(params)
|
|
|
45
|
|
|
46 if not os.path.exists(output_dir):
|
|
|
47 os.makedirs(output_dir)
|
|
|
48
|
|
|
49 vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly)
|
|
|
50
|
|
|
51 print("Running INSTALL.pl")
|
|
|
52 print(vep_install_cmd)
|
|
|
53 exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True)
|
|
|
54
|
|
|
55 print(exit_code)
|
|
|
56
|
|
|
57 output_dict = dict(
|
|
|
58 data_tables=dict(
|
|
|
59 vep_cache_data=[{
|
|
|
60 "value": species,
|
|
|
61 "path": output_dir,
|
|
|
62 "dbkey": args.species,
|
|
|
63 "type": species_type,
|
|
|
64 "name": "{0} ({1})".format(args.species, species_type)
|
|
|
65 }]
|
|
|
66 )
|
|
|
67 )
|
|
|
68 with open(output_file, 'w') as output_file_handle:
|
|
|
69 output_file_handle.write(json.dumps(output_dict))
|
|
|
70 sys.exit(exit_code) |