Mercurial > repos > sanbi-uwc > build_ctb_gene
view build_ctb_gene.py @ 32:9e33d452ad8c draft
planemo upload for repository https://github.com/SANBI-SA/tools-sanbi-uwc.git commit cca5de2166bbc2fbbab368415853e085d474b6b2
author | sanbi-uwc |
---|---|
date | Tue, 24 May 2016 04:19:03 -0400 |
parents | bcf4f741b94f |
children | f9ad7d3cd333 |
line wrap: on
line source
#!/usr/bin/env python from __future__ import print_function import argparse import os import sys import glob import shlex import shutil import datetime import time import random from subprocess import check_call, check_output, CalledProcessError import logging log = logging.getLogger(__name__) def inspect_docker(cmd_str): output = None try: output = check_output(cmd_str, shell=True) except CalledProcessError: print("Error running get_docker_port by build_ctb_gene", file=sys.stderr) return output class BuildCtbRunner(object): def __init__(self, args=None): ''' Initializes an object to run CtbRunner in Galaxy. ''' # Check whether the options are specified and saves them into the object # assert args != None self.args = args self.mount_point = None self.docker_instance_name = "build_ctb_gene_" + str(random.randrange(0, 1000, 2)) def build_ctb_gene(self): cmdline_str = "build_ctb_gene goterms {}".format(self.args.input_file) #cmdline_str = "touch /tmp/foo.bar" cmdline_str = self.newSplit(cmdline_str) build_ctb_run = False try: check_call(cmdline_str) build_ctb_run = True except CalledProcessError: print("Error running the build_ctb_gene goterms", file=sys.stderr) if build_ctb_run: self.copy_output_file_to_dataset() print("Building a new DB, current time: %s" % str(datetime.date.today())) # print("Noe4j Database Name: http://%s:%s@%s:%s/db/data/" % ( # self.args.username, self.args.password, self.args.url, self.args.port)) print("GFF File - Input: %s" % str(self.args.input_file)) def newSplit(self, value): lex = shlex.shlex(value) lex.quotes = '"' lex.whitespace_split = True lex.commenters = '' return list(lex) def copy_output_file_to_dataset(self): ''' Retrieves the output files from the output directory and copies them to the Galaxy output files ''' # retrieve neo4j files to the working gx directory mp = self.mount_point + "/graph.db" result_file = glob.glob(mp + '/*') for file_name in result_file: if os.path.isfile(file_name): shutil.copy2(file_name, self.args.outputdir) elif os.path.isdir(file_name): # create the parent dir before copytree os.chdir(self.args.outputdir) shutil.copytree(file_name, file_name.rsplit('/', 1)[-1]) def docker_stop(self): stop_cmd = 'docker stop build_ctb_gene' stop_cmd_str = self.newSplit(stop_cmd) try: check_call(stop_cmd_str) except CalledProcessError: print("Error running docker stop build_ctb_gene", file=sys.stderr) def docker_run(self): self.mount_point = "{}/neo4j/data".format(os.getcwd()) cmd_str = "docker run -d -P -v {}:/data -e NEO4J_AUTH=none --name {} thoba/neo4j_galaxy_ie".format( self.mount_point, self.docker_instance_name) cmd = self.newSplit(cmd_str) try: check_call(cmd) except CalledProcessError: print("Error running docker run by build_ctb_gene", file=sys.stderr) def main(): parser = argparse.ArgumentParser(description="Tool used to extract data about genes using locus_tags") parser.add_argument('--outputdir') parser.add_argument('--input_file') args = parser.parse_args() ctb_gene_runner = BuildCtbRunner(args) # boot up a neo4j docker container ctb_gene_runner.docker_run() # get the port of the docker container cmd_str = "docker inspect --format='{{(index (index .NetworkSettings.Ports \"7474/tcp\") 0).HostPort}}' %s" % ctb_gene_runner.docker_instance_name # TODO: randomise the ports/names/mount_point and use the autokill image neo4j_url = 'http://localhost:{}/db/data/'.format( inspect_docker(cmd_str)[:-1]) try: os.environ["NEO4J_REST_URL"] = neo4j_url except (OSError, ValueError), e: print("Error setting the NEO4J db environmental values", e) # make the output directory if not os.path.exists(args.outputdir): os.makedirs(args.outputdir) time.sleep(60) ctb_gene_runner.build_ctb_gene() if __name__ == "__main__": main()