diff TrustStoreGalaxyImport.py @ 20:b94a8f55b1da

Un-gzip.
author Catherine Wise <catherine.wise@csiro.au>
date Mon, 13 Jan 2014 08:51:17 +1100
parents 159d2159e745
children 8cf18ca6f13f
line wrap: on
line diff
--- a/TrustStoreGalaxyImport.py	Fri Dec 13 14:39:58 2013 +1100
+++ b/TrustStoreGalaxyImport.py	Mon Jan 13 08:51:17 2014 +1100
@@ -1,6 +1,8 @@
 import sys
 import shutil
+import gzip
 from py_ts import TrustStoreClient, ts_utils
+from galaxy.datatypes.checkers import *
 
 def printNice(elem, f, depth):
     try:
@@ -54,7 +56,34 @@
                 downloadMe = ts_utils.ts_utils.recurseToChildNamed(location, filename)
                 if downloadMe:
                     download = ts.getFile(store, downloadMe)
-                    shutil.copy(download, outputFile)
+                    is_gzipped, is_valid = check_gzip(download)
+
+                    if is_gzipped and not is_valid:
+                        print "File is compressed (gzip) but not valid."
+                        sys.exit(4)
+                    elif is_gzipped and is_valid:
+                        if link_data_only == 'copy_files':
+                            # We need to uncompress the temp_name file, but BAM files must remain compressed in the BGZF format
+                            CHUNK_SIZE = 2**20 # 1Mb
+                            fd, uncompressed = tempfile.mkstemp(prefix='data_id_%s_upload_gunzip_' % dataset.dataset_id, dir=os.path.dirname(outputFile), text=False )
+                            gzipped_file = gzip.GzipFile(download, 'rb')
+                            while 1:
+                                try:
+                                    chunk = gzipped_file.read(CHUNK_SIZE)
+                                except IOError:
+                                    os.close(fd)
+                                    os.remove(uncompressed)
+                                    print 'Problem decompressing gzipped data', dataset, json_file
+                                    sys.exit(4)
+                                if not chunk:
+                                    break
+                                os.write(fd, chunk)
+                            os.close(fd)
+                            gzipped_file.close()
+
+                        shutil.copy(uncompressed, outputFile)
+                    else:
+                        shutil.copy(download, outputFile)
                 else:
                     print "File not found"
                     sys.exit(4)