Mercurial > repos > bgruening > data_manager_diamond_database_builder
changeset 7:52bf3ce034ef draft
Uploaded
author | bgruening |
---|---|
date | Sun, 08 Feb 2015 03:42:18 -0500 |
parents | ad69009da1b5 |
children | 571090e22eb5 |
files | data_manager/data_manager_diamond_database_builder.py |
diffstat | 1 files changed, 23 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_diamond_database_builder.py Sat Feb 07 22:39:21 2015 -0500 +++ b/data_manager/data_manager_diamond_database_builder.py Sun Feb 08 03:42:18 2015 -0500 @@ -6,6 +6,7 @@ import optparse import urllib2 #import uuid +import subprocess from ftplib import FTP import tarfile import zipfile @@ -74,7 +75,7 @@ break if not ucsc_file_name: - raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ucsc_dbkey, path_contents ) ) + raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ncbi_identifier, path_contents ) ) tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' ) ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ncbi_identifier, ext ) ) @@ -149,7 +150,7 @@ def _stream_fasta_to_file( fasta_stream, target_directory, database_id, database_name, params, close_stream=True ): fasta_base_filename = "%s.fa" % database_id fasta_filename = os.path.join( target_directory, fasta_base_filename ) - fasta_writer = open( fasta_filename, 'wb+' ) + fasta_writer = open( fasta_base_filename, 'wb+' ) if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1: fasta_stream = fasta_stream[0] @@ -177,10 +178,27 @@ break if close_stream: fasta_stream.close() - + fasta_writer.close() - - return dict( value=database_id, name=database_name, path=fasta_base_filename ) + + args = [ 'diamond', 'makedb', '--in', fasta_base_filename, '--db', fasta_filename] + + tmp_stderr = tempfile.NamedTemporaryFile( prefix = "tmp-data-manager-diamond-database-builder-stderr" ) + proc = subprocess.Popen( args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() ) + return_code = proc.wait() + if return_code: + tmp_stderr.flush() + tmp_stderr.seek(0) + print >> sys.stderr, "Error building diamond databse:" + while True: + chunk = tmp_stderr.read( CHUNK_SIZE ) + if not chunk: + break + sys.stderr.write( chunk ) + sys.exit( return_code ) + tmp_stderr.close() + + return dict( value=database_id, name=database_name, path="%s.dmnd" % fasta_base_filename ) def _create_symlink( input_filename, target_directory, database_id, database_name ):