Mercurial > repos > dvanzessen > vep_emc
comparison data_manager/fetch_vep_cache_data.py @ 2:17c98d091710 draft
Uploaded
| author | dvanzessen |
|---|---|
| date | Mon, 15 Jul 2019 05:19:31 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 1:268244627bf2 | 2:17c98d091710 |
|---|---|
| 1 import argparse | |
| 2 import os | |
| 3 import json | |
| 4 import re | |
| 5 import pprint | |
| 6 import subprocess | |
| 7 import sys | |
| 8 | |
| 9 if __name__ == "__main__": | |
| 10 parser = argparse.ArgumentParser(description="Use VEP INSTALL.pl to download/process the cache for an assembly") | |
| 11 parser.add_argument("--output-file") | |
| 12 parser.add_argument("--output-dir") | |
| 13 parser.add_argument("--species") | |
| 14 parser.add_argument("--species-type", choices=["ensembl", "refseq", "merged"], default="ensembl") | |
| 15 args = parser.parse_args() | |
| 16 | |
| 17 output_file = args.output_file | |
| 18 output_dir = args.output_dir | |
| 19 | |
| 20 species = args.species | |
| 21 assembly = "" | |
| 22 | |
| 23 if species.startswith("homo_sapiens"): | |
| 24 if species.endswith("37"): | |
| 25 assembly = " --ASSEMBLY GRCh37" | |
| 26 elif species.endswith("38"): | |
| 27 assembly = " --ASSEMBLY GRCh38" | |
| 28 else: | |
| 29 print("Unknown human assembly") | |
| 30 sys.exit(1) | |
| 31 species = "homo_sapiens" | |
| 32 | |
| 33 species_type = args.species_type | |
| 34 if species_type in ["refseq", "merged"]: | |
| 35 species = "{0}_{1}".format(species, species_type) | |
| 36 | |
| 37 with open(output_file) as output_file_handle: | |
| 38 params = json.loads(output_file_handle.read()) | |
| 39 | |
| 40 print(output_file) | |
| 41 print(output_dir) | |
| 42 print(species) | |
| 43 print(species_type) | |
| 44 pprint.pprint(params) | |
| 45 | |
| 46 if not os.path.exists(output_dir): | |
| 47 os.makedirs(output_dir) | |
| 48 | |
| 49 vep_install_cmd = "vep_install --NO_HTSLIB -a alcf --CACHEDIR {0} --SPECIES {1}{2}".format(output_dir, species, assembly) | |
| 50 | |
| 51 print("Running INSTALL.pl") | |
| 52 print(vep_install_cmd) | |
| 53 exit_code = subprocess.call(vep_install_cmd, cwd=output_dir, shell=True) | |
| 54 | |
| 55 print(exit_code) | |
| 56 | |
| 57 output_dict = dict( | |
| 58 data_tables=dict( | |
| 59 vep_cache_data=[{ | |
| 60 "value": species, | |
| 61 "path": output_dir, | |
| 62 "dbkey": args.species, | |
| 63 "type": species_type, | |
| 64 "name": "{0} ({1})".format(args.species, species_type) | |
| 65 }] | |
| 66 ) | |
| 67 ) | |
| 68 with open(output_file, 'w') as output_file_handle: | |
| 69 output_file_handle.write(json.dumps(output_dict)) | |
| 70 sys.exit(exit_code) |
