# HG changeset patch # User bgruening # Date 1423384938 18000 # Node ID 52bf3ce034efe00e73bc6002b1240ab46ee032eb # Parent ad69009da1b52dad820351f69faa3b7fbe81bb63 Uploaded diff -r ad69009da1b5 -r 52bf3ce034ef data_manager/data_manager_diamond_database_builder.py --- a/data_manager/data_manager_diamond_database_builder.py Sat Feb 07 22:39:21 2015 -0500 +++ b/data_manager/data_manager_diamond_database_builder.py Sun Feb 08 03:42:18 2015 -0500 @@ -6,6 +6,7 @@ import optparse import urllib2 #import uuid +import subprocess from ftplib import FTP import tarfile import zipfile @@ -74,7 +75,7 @@ break if not ucsc_file_name: - raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ucsc_dbkey, path_contents ) ) + raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ncbi_identifier, path_contents ) ) tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' ) ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ncbi_identifier, ext ) ) @@ -149,7 +150,7 @@ def _stream_fasta_to_file( fasta_stream, target_directory, database_id, database_name, params, close_stream=True ): fasta_base_filename = "%s.fa" % database_id fasta_filename = os.path.join( target_directory, fasta_base_filename ) - fasta_writer = open( fasta_filename, 'wb+' ) + fasta_writer = open( fasta_base_filename, 'wb+' ) if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1: fasta_stream = fasta_stream[0] @@ -177,10 +178,27 @@ break if close_stream: fasta_stream.close() - + fasta_writer.close() - - return dict( value=database_id, name=database_name, path=fasta_base_filename ) + + args = [ 'diamond', 'makedb', '--in', fasta_base_filename, '--db', fasta_filename] + + tmp_stderr = tempfile.NamedTemporaryFile( prefix = "tmp-data-manager-diamond-database-builder-stderr" ) + proc = subprocess.Popen( args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() ) + return_code = proc.wait() + if return_code: + tmp_stderr.flush() + tmp_stderr.seek(0) + print >> sys.stderr, "Error building diamond databse:" + while True: + chunk = tmp_stderr.read( CHUNK_SIZE ) + if not chunk: + break + sys.stderr.write( chunk ) + sys.exit( return_code ) + tmp_stderr.close() + + return dict( value=database_id, name=database_name, path="%s.dmnd" % fasta_base_filename ) def _create_symlink( input_filename, target_directory, database_id, database_name ):