| 2 | 1 import argparse | 
|  | 2 import os | 
|  | 3 import json | 
|  | 4 import re | 
|  | 5 import pprint | 
|  | 6 import subprocess | 
|  | 7 import sys | 
|  | 8 | 
|  | 9 if __name__ == "__main__": | 
|  | 10     parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly") | 
|  | 11     parser.add_argument("--output-file") | 
|  | 12     parser.add_argument("--output-dir") | 
|  | 13     parser.add_argument("--species") | 
|  | 14     parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl") | 
|  | 15     args = parser.parse_args() | 
|  | 16 | 
|  | 17     output_file = args.output_file | 
|  | 18     output_dir = args.output_dir | 
|  | 19 | 
|  | 20     species = args.species | 
|  | 21     assembly = "" | 
|  | 22 | 
|  | 23     if species.startswith("homo_sapiens"): | 
|  | 24         if species.endswith("37"): | 
|  | 25             assembly = " --ASSEMBLY GRCh37" | 
|  | 26         elif species.endswith("38"): | 
|  | 27             assembly = " --ASSEMBLY GRCh38" | 
|  | 28         else: | 
|  | 29             print("Unknown human assembly") | 
|  | 30             sys.exit(1) | 
|  | 31         species = "homo_sapiens" | 
|  | 32 | 
|  | 33     species_type = args.species_type | 
|  | 34     if species_type in ["refseq", "merged"]: | 
|  | 35         species = "{0}_{1}".format(species, species_type) | 
|  | 36 | 
|  | 37     with open(output_file) as output_file_handle: | 
|  | 38         params = json.loads(output_file_handle.read()) | 
|  | 39 | 
|  | 40     print(output_file) | 
|  | 41     print(output_dir) | 
|  | 42     print(species) | 
|  | 43     print(species_type) | 
|  | 44     pprint.pprint(params) | 
|  | 45 | 
|  | 46     if not os.path.exists(output_dir): | 
|  | 47         os.makedirs(output_dir) | 
|  | 48 | 
|  | 49     vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly) | 
|  | 50 | 
|  | 51     print("Running INSTALL.pl") | 
|  | 52     print(vep_install_cmd) | 
|  | 53     exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True) | 
|  | 54 | 
|  | 55     print(exit_code) | 
|  | 56 | 
|  | 57     output_dict = dict( | 
|  | 58         data_tables=dict( | 
|  | 59             vep_cache_data=[{ | 
|  | 60                 "value": species, | 
|  | 61                 "path": output_dir, | 
|  | 62                 "dbkey": args.species, | 
|  | 63                 "type": species_type, | 
|  | 64                 "name": "{0} ({1})".format(args.species, species_type) | 
|  | 65             }] | 
|  | 66         ) | 
|  | 67     ) | 
|  | 68     with open(output_file, 'w') as output_file_handle: | 
|  | 69         output_file_handle.write(json.dumps(output_dict)) | 
|  | 70     sys.exit(exit_code) |