Mercurial > repos > devteam > data_manager_fetch_ncbi_taxonomy
comparison data_manager/data_manager.py @ 0:078e803565cf draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/blob/master/data_managers/data_manager_fetch_ncbi_taxonomy/ commit 86cf90107482cab1cb47fc0d42d6705f8077daa7
| author | devteam |
|---|---|
| date | Fri, 06 Nov 2015 14:16:28 -0500 |
| parents | |
| children | 8dd4076813ef |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:078e803565cf |
|---|---|
| 1 import argparse | |
| 2 import datetime | |
| 3 import json | |
| 4 import os | |
| 5 import shutil | |
| 6 import sys | |
| 7 import tarfile | |
| 8 import urllib2 | |
| 9 import zipfile | |
| 10 | |
| 11 parser = argparse.ArgumentParser(description='Create data manager json.') | |
| 12 parser.add_argument('--out', dest='output', action='store', help='JSON filename') | |
| 13 parser.add_argument('--name', dest='name', action='store', default=str(datetime.date.today()), help='Data table entry unique ID') | |
| 14 parser.add_argument('--url', dest='url', action='store', default='ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz', help='Download URL') | |
| 15 | |
| 16 args = parser.parse_args() | |
| 17 | |
| 18 def url_download(url, workdir): | |
| 19 file_path = os.path.join(workdir, 'download.dat') | |
| 20 if not os.path.exists(workdir): | |
| 21 os.makedirs(workdir) | |
| 22 src = None | |
| 23 dst = None | |
| 24 try: | |
| 25 req = urllib2.Request(url) | |
| 26 src = urllib2.urlopen(req) | |
| 27 dst = open(file_path, 'wb') | |
| 28 while True: | |
| 29 chunk = src.read(2**10) | |
| 30 if chunk: | |
| 31 dst.write(chunk) | |
| 32 else: | |
| 33 break | |
| 34 except Exception, e: | |
| 35 print >>sys.stderr, str(e) | |
| 36 finally: | |
| 37 if src: | |
| 38 src.close() | |
| 39 if dst: | |
| 40 dst.close() | |
| 41 if tarfile.is_tarfile(file_path): | |
| 42 fh = tarfile.open(file_path, 'r:*') | |
| 43 elif zipfile.is_zipfile(file_path): | |
| 44 fh = zipfile.ZipFile(file_path, 'r') | |
| 45 else: | |
| 46 return | |
| 47 fh.extractall(workdir) | |
| 48 os.remove(file_path) | |
| 49 | |
| 50 | |
| 51 def main(args): | |
| 52 workdir = os.path.join(os.getcwd(), 'taxonomy') | |
| 53 url_download(args.url, workdir) | |
| 54 data_manager_entry = {} | |
| 55 data_manager_entry['value'] = args.name.lower() | |
| 56 data_manager_entry['name'] = args.name | |
| 57 data_manager_entry['path'] = '.' | |
| 58 data_manager_json = dict(data_tables=dict(ncbi_taxonomy=data_manager_entry)) | |
| 59 params = json.loads(open(args.output).read()) | |
| 60 target_directory = params['output_data'][0]['extra_files_path'] | |
| 61 os.mkdir(target_directory) | |
| 62 output_path = os.path.abspath(os.path.join(os.getcwd(), 'taxonomy')) | |
| 63 for filename in os.listdir(workdir): | |
| 64 shutil.move(os.path.join(output_path, filename), target_directory) | |
| 65 file(args.output, 'w').write(json.dumps(data_manager_json)) | |
| 66 | |
| 67 if __name__ == '__main__': | |
| 68 main(args) |
