changeset 7:52bf3ce034ef draft

Uploaded
author bgruening
date Sun, 08 Feb 2015 03:42:18 -0500
parents ad69009da1b5
children 571090e22eb5
files data_manager/data_manager_diamond_database_builder.py
diffstat 1 files changed, 23 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/data_manager_diamond_database_builder.py	Sat Feb 07 22:39:21 2015 -0500
+++ b/data_manager/data_manager_diamond_database_builder.py	Sun Feb 08 03:42:18 2015 -0500
@@ -6,6 +6,7 @@
 import optparse
 import urllib2
 #import uuid
+import subprocess
 from ftplib import FTP
 import tarfile
 import zipfile
@@ -74,7 +75,7 @@
             break
 
     if not ucsc_file_name:
-        raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ucsc_dbkey, path_contents ) )
+        raise Exception( 'Unable to determine filename for UCSC Genome for %s: %s' % ( ncbi_identifier, path_contents ) )
 
     tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' )
     ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ncbi_identifier, ext ) )
@@ -149,7 +150,7 @@
 def _stream_fasta_to_file( fasta_stream, target_directory, database_id, database_name, params, close_stream=True ):
     fasta_base_filename = "%s.fa" % database_id
     fasta_filename = os.path.join( target_directory, fasta_base_filename )
-    fasta_writer = open( fasta_filename, 'wb+' )
+    fasta_writer = open( fasta_base_filename, 'wb+' )
 
     if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1:
         fasta_stream = fasta_stream[0]
@@ -177,10 +178,27 @@
                 break
         if close_stream:
             fasta_stream.close()
-    
+
     fasta_writer.close()
-    
-    return dict( value=database_id, name=database_name, path=fasta_base_filename )
+
+    args = [ 'diamond', 'makedb', '--in', fasta_base_filename, '--db', fasta_filename]
+
+    tmp_stderr = tempfile.NamedTemporaryFile( prefix = "tmp-data-manager-diamond-database-builder-stderr" )
+    proc = subprocess.Popen( args=args, shell=False, cwd=target_directory, stderr=tmp_stderr.fileno() )
+    return_code = proc.wait()
+    if return_code:
+        tmp_stderr.flush()
+        tmp_stderr.seek(0)
+        print >> sys.stderr, "Error building diamond databse:"
+        while True:
+            chunk = tmp_stderr.read( CHUNK_SIZE )
+            if not chunk:
+                break
+            sys.stderr.write( chunk )
+        sys.exit( return_code )
+    tmp_stderr.close()
+
+    return dict( value=database_id, name=database_name, path="%s.dmnd" % fasta_base_filename )
 
 
 def _create_symlink( input_filename, target_directory, database_id, database_name ):