Mercurial > repos > sanbi-uwc > build_ctb_gene
view build_ctb_gene.py @ 49:74ad74a4be56 draft default tip
planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc.git commit 11bbdc95f9fe5a9f85c3e71c35acfd874c0ab9be
author | sanbi-uwc |
---|---|
date | Thu, 07 Jul 2016 09:34:47 -0400 |
parents | 515fdc4654a5 |
children |
line wrap: on
line source
#!/usr/bin/env python from __future__ import print_function import argparse import os import sys import glob import shlex import shutil import datetime import time import random from subprocess import check_call, check_output, CalledProcessError, Popen, STDOUT, PIPE import socket try: from urllib.parse import urlparse except ImportError: from urlparse import urlparse import logging log = logging.getLogger(__name__) def inspect_docker(cmd_str): output = None try: output = check_output(cmd_str, shell=True) except CalledProcessError: print("Error running get_docker_port by build_ctb_gene", file=sys.stderr) return None return output class BuildCtbRunner(object): def __init__(self, args=None): """ Initializes an object to run CtbRunner in Galaxy. """ # Check whether the options are specified and saves them into the object # assert args != None self.args = args self.outputdir = args.outputdir self.mount_point = None self.docker_instance_name = "build_ctb_gene_" + str(random.randrange(0, 1000, 2)) self.neo4j_proc = None def build_ctb_gene(self): cmdline_str = "goget goterms {} {}".format(self.args.input_file, self.args.human_interactions) cmdline_str = self.newSplit(cmdline_str) try: check_call(cmdline_str) except CalledProcessError: print("Error running the build_ctb_gene goterms", file=sys.stderr) return None else: # self.copy_output_file_to_dataset() print("Building a new DB, current time: %s" % str(datetime.date.today())) print("GFF File - Input: %s" % str(self.args.input_file)) return True def newSplit(self, value): lex = shlex.shlex(value) lex.quotes = '"' lex.whitespace_split = True lex.commenters = '' return list(lex) def copy_output_file_to_dataset(self): """ Retrieves the output files from the gx working directory and copy them to the Galaxy output directory """ # retrieve neo4j files to the working gx directory mp = self.mount_point + "/graph.db" result_file = glob.glob(mp + '/*') for file_name in result_file: if os.path.isfile(file_name): shutil.copy2(file_name, self.args.outputdir) elif os.path.isdir(file_name): # create the parent dir before copytree os.chdir(self.args.outputdir) shutil.copytree(file_name, file_name.rsplit('/', 1)[-1]) def docker_stop(self): stop_cmd = 'docker stop build_ctb_gene' stop_cmd_str = self.newSplit(stop_cmd) try: check_call(stop_cmd_str) except CalledProcessError: print("Error running docker stop build_ctb_gene", file=sys.stderr) return None else: return True def get_docker_output(self): if self.neo4j_proc is not None: return_code = self.neo4j_proc.wait() (output, _) = self.neo4j_proc.communicate() return 'Return code: {rc}\n{output}'.format(rc=return_code, output=output) def shutdown_docker(self): if self.neo4j_proc is not None: self.neo4j_proc.terminate() def docker_run(self): self.mount_point = "{}".format(self.outputdir) try: os.makedirs(self.mount_point) except os.error as e: print("Error creating mount point {mount_point}: {error}".format(mount_point=self.mount_point, error=e.strerror)) cmd_str = "docker run --rm -P -v {mount_point}:/data -e NEO4J_UID={uid} -e NEO4J_GID={gid} -e NEO4J_AUTH=none -e NEO4J_MONITOR_TRAFFIC=false --name {name} thoba/neo4j_galaxy_ie:latest".format( mount_point=self.mount_point, name=self.docker_instance_name, uid=os.getuid(), gid=os.getgid(), ) cmd = self.newSplit(cmd_str) self.neo4j_proc = Popen(cmd, stdout=PIPE, stderr=STDOUT) time.sleep(30) # give the container time to wake up if self.neo4j_proc.poll() is not None: raise CalledProcessError("Error running docker run by build_ctb_gene:\n", self.get_docker_output) def main(): parser = argparse.ArgumentParser(description="Tool used to extract data about genes using locus_tags") parser.add_argument('--outputdir') parser.add_argument('--input_file') parser.add_argument('--human_interactions') args = parser.parse_args() ctb_gene_runner = BuildCtbRunner(args) # boot up a neo4j docker container ctb_gene_runner.docker_run() # get the port of the docker container cmd_str = "docker inspect --format='{{(index (index .NetworkSettings.Ports \"7474/tcp\") 0).HostPort}}' %s" % ctb_gene_runner.docker_instance_name # TODO: randomise the ports/names/mount_point and use the auto kill image neo4j_container_info = inspect_docker(cmd_str) if neo4j_container_info is None: exit(1) else: neo4j_port = neo4j_container_info[:-1] neo4j_url = 'http://localhost:{}/db/data/'.format(neo4j_port) try: os.environ["NEO4J_REST_URL"] = neo4j_url except (OSError, ValueError) as e: print("Error setting the NEO4J db environmental values", e) # make the output directory if not os.path.exists(args.outputdir): os.makedirs(args.outputdir) url = urlparse(neo4j_url) if '@' in url.netloc: (host, port) = url.netloc.split('@')[1].split(':') else: (host, port) = url.netloc.split(':') timeout = int(os.environ.get('NEO4J_WAIT_TIMEOUT', 30)) # time to wait till neo4j connected = False #print('host, port', host, port) while timeout > 0: try: socket.create_connection((host, port), 1) except socket.error: timeout -= 1 time.sleep(1) else: connected = True break if not connected: sys.exit('timed out trying to connect to {}'.format(neo4j_url)) status = ctb_gene_runner.build_ctb_gene() ctb_gene_runner.shutdown_docker() if status is None: exit(1) if __name__ == "__main__": main()