diff data_manager/fetch_refseq.py @ 2:a4ee45e7237b draft

planemo upload for repository https://github.com/pvanheus/refseq_fasta_data_manager commit a0125981706495e0a8be4fafe2eb1af3f0cfdaa3-dirty
author sanbi-uwc
date Fri, 07 Sep 2018 18:08:23 -0400
parents cfe6cd521835
children 4852eb1a75e5
line wrap: on
line diff
--- a/data_manager/fetch_refseq.py	Fri Sep 07 17:40:42 2018 -0400
+++ b/data_manager/fetch_refseq.py	Fri Sep 07 18:08:23 2018 -0400
@@ -3,6 +3,7 @@
 from __future__ import print_function, division
 import argparse
 from datetime import date
+import functools
 import gzip
 import json
 from multiprocessing import Process, Queue
@@ -52,11 +53,10 @@
             if debug:
                 print('Reading', input_filename, file=sys.stderr)
             with gzip.open(input_filename) as input_file:
-                data = input_file.read(chunk_size)
-                while data != '':
+                read_chunk = functools.partial(input_file.read, (chunk_size))
+                for data in iter(read_chunk, ''):  # use '' as a sentinel to stop the loop
                     output_file.write(data)
-                    data = input_file.read(chunk_size)
-            # os.unlink(input_filename)
+            os.unlink(input_filename)
             input_filename = conn.get()
 
 def get_refseq_division(division_name, mol_types, output_directory, debug=False, compress=False):