Mercurial > repos > sanbi-uwc > build_ctb_gene
view build_ctb_gene.py @ 38:65df0b64db15 draft
planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc.git commit d7d335bdd8e18529d643b7ce55a311db71f957bc
author | sanbi-uwc |
---|---|
date | Wed, 01 Jun 2016 07:37:28 -0400 |
parents | 8f16164019bd |
children | 2a9a2575c8a4 |
line wrap: on
line source
#!/usr/bin/env python from __future__ import print_function import argparse import os import sys import glob import shlex import shutil import datetime import time import random from subprocess import check_call, check_output, CalledProcessError import socket try: from urllib.parse import urlparse except ImportError: from urlparse import urlparse import logging log = logging.getLogger(__name__) def inspect_docker(cmd_str): output = None try: output = check_output(cmd_str, shell=True) except CalledProcessError: print("Error running get_docker_port by build_ctb_gene", file=sys.stderr) return None return output class BuildCtbRunner(object): def __init__(self, args=None): """ Initializes an object to run CtbRunner in Galaxy. """ # Check whether the options are specified and saves them into the object # assert args != None self.args = args self.outputdir = args.outputdir self.mount_point = None self.docker_instance_name = "build_ctb_gene_" + str(random.randrange(0, 1000, 2)) def build_ctb_gene(self): cmdline_str = "build_ctb_gene test {}".format(self.args.input_file) cmdline_str = self.newSplit(cmdline_str) try: check_call(cmdline_str) except CalledProcessError: print("Error running the build_ctb_gene goterms", file=sys.stderr) return None else: # self.copy_output_file_to_dataset() print("Building a new DB, current time: %s" % str(datetime.date.today())) print("GFF File - Input: %s" % str(self.args.input_file)) return True def newSplit(self, value): lex = shlex.shlex(value) lex.quotes = '"' lex.whitespace_split = True lex.commenters = '' return list(lex) def copy_output_file_to_dataset(self): """ Retrieves the output files from the gx working directory and copy them to the Galaxy output directory """ # retrieve neo4j files to the working gx directory mp = self.mount_point + "/graph.db" result_file = glob.glob(mp + '/*') for file_name in result_file: if os.path.isfile(file_name): shutil.copy2(file_name, self.args.outputdir) elif os.path.isdir(file_name): # create the parent dir before copytree os.chdir(self.args.outputdir) shutil.copytree(file_name, file_name.rsplit('/', 1)[-1]) def docker_stop(self): stop_cmd = 'docker stop build_ctb_gene' stop_cmd_str = self.newSplit(stop_cmd) try: check_call(stop_cmd_str) except CalledProcessError: print("Error running docker stop build_ctb_gene", file=sys.stderr) return None else: return True def docker_run(self): self.mount_point = "{}".format(self.outputdir) try: os.makedirs(self.mount_point) except os.error as e: print("Error creating mount point {mount_point}: {error}".format(mount_point=self.mount_point, error=e.strerror)) cmd_str = "docker run -d -P -v {}:/data -e NEO4J_AUTH=none --name {} thoba/neo4j_galaxy_ie".format( self.mount_point, self.docker_instance_name) cmd = self.newSplit(cmd_str) try: check_call(cmd) except CalledProcessError: print("Error running docker run by build_ctb_gene", file=sys.stderr) def main(): parser = argparse.ArgumentParser(description="Tool used to extract data about genes using locus_tags") parser.add_argument('--outputdir') parser.add_argument('--input_file') args = parser.parse_args() ctb_gene_runner = BuildCtbRunner(args) # boot up a neo4j docker container ctb_gene_runner.docker_run() # get the port of the docker container cmd_str = "docker inspect --format='{{(index (index .NetworkSettings.Ports \"7474/tcp\") 0).HostPort}}' %s" % ctb_gene_runner.docker_instance_name # TODO: randomise the ports/names/mount_point and use the auto kill image neo4j_container_info = inspect_docker(cmd_str) if neo4j_container_info is None: exit(1) else: neo4j_port = neo4j_container_info[:-1] neo4j_url = 'http://localhost:{}/db/data/'.format(neo4j_port) try: os.environ["NEO4J_REST_URL"] = neo4j_url except (OSError, ValueError), e: print("Error setting the NEO4J db environmental values", e) # make the output directory if not os.path.exists(args.outputdir): os.makedirs(args.outputdir) url = urlparse(neo4j_url) if '@' in url.netloc: (host, port) = url.netloc.split('@')[1].split(':') else: (host, port) = url.netloc.split(':') timeout = int(os.environ.get('NEO4J_WAIT_TIMEOUT', 30)) # time to wait till neo4j connected = False #print('host, port', host, port) while timeout > 0: try: socket.create_connection((host, port), 1) except socket.error: timeout -= 1 time.sleep(1) else: connected = True break if not connected: sys.exit('timed out trying to connect to {}'.format(neo4j_url)) status = ctb_gene_runner.build_ctb_gene() if status is None: exit(1) if __name__ == "__main__": main()