Mercurial > repos > chrisw > data_manager_monorail_index_fetcher
view data_manager_monorail_index/data_manager/data_manager.py @ 20:166451f0ac6a draft
Uploaded
author | chrisw |
---|---|
date | Thu, 14 Nov 2019 02:28:00 +0000 |
parents | e7d440a3b439 |
children | 179527791fab |
line wrap: on
line source
#based off of "Gene Annotation Fetcher" https://testtoolshed.g2.bx.psu.edu/repository?repository_id=c0f5466df187cc04&changeset_revision=444300ec9185 import argparse import datetime import json import os import shutil import sys import tarfile import urllib2 import zipfile parser = argparse.ArgumentParser(description='Create data manager json.') parser.add_argument('--config', dest='config', action='store', help='JSON filename') #parser.add_argument('--name', dest='name', action='store', default=None, help='Data table entry unique ID') parser.add_argument('--url', dest='url', action='store', help='Monorail Indexes Base URL', default="http://snaptron.cs.jhu.edu/data/monorail/1.0.0/ath10") parser.add_argument('--subdir', dest='subdir', action='store', help='subdirectory where the indexes are temporarily downloaded to') #parser.add_argument('--version', dest='version', action='store', help='Version of Monorail as a whole (not a particular index/program)') args = parser.parse_args() def main(args): workdir = os.path.join(os.getcwd(), 'monorail_index') data_manager_entry = {} #URL syntax assumes that the last two components of the URL are: <version>/<ref> #e.g. 1.0.0/ath10 url_comps = args.url.split('/') ref = url_comps[-1] version = url_comps[-2] data_manager_entry['dbkey'] = ref.lower() jsonin = open(args.config).read() params = json.loads(jsonin) #target_directory = params['output_data'][0]['extra_files_path'] #data_manager_entry['path'] = params['output_data'][0]['extra_files_path'] data_manager_entry['path'] = args.subdir data_manager_entry['exons_path'] = args.subdir data_manager_entry['version'] = version #data_manager_entry['exons_path'] = data_manager_entry['path'] + os.sep + 'gtf' + os.sep + 'exons.bed' data_manager_json = {'data_tables':{'monorail_index': [data_manager_entry]}} file(args.config, 'w').write(json.dumps(data_manager_json)) if __name__ == '__main__': main(args)