Mercurial > repos > trinity_ctat > ctat_genome_resource_libs_data_manager_3
changeset 35:4cff4dbd7d6b draft
Adding flush staements, so that directory listings will accompany their corresponding print statements.
author | trinity_ctat |
---|---|
date | Thu, 25 Oct 2018 15:35:48 -0400 |
parents | 9009e1a12afd |
children | e3059a4ccf96 |
files | data_manager/add_ctat_resource_lib.py |
diffstat | 1 files changed, 78 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/add_ctat_resource_lib.py Thu Oct 25 11:03:53 2018 -0400 +++ b/data_manager/add_ctat_resource_lib.py Thu Oct 25 15:35:48 2018 -0400 @@ -10,6 +10,7 @@ # Users can create or download other libraries and use this Data Manger to add them # if they don't want to add them by hand. +import sys import argparse import os import shutil @@ -166,6 +167,7 @@ # return a tuple of the urls print "The list being returned as options is:" print "{:s}\n".format(str(options)) + sys.stdout.flush() return options def get_mutation_resource_urls(): @@ -226,6 +228,7 @@ # return a tuple of the urls print "The list being returned as options is:" print "{:s}\n".format(str(options)) + sys.stdout.flush() return options # The following was used by the example program to get input parameters through the json. @@ -245,9 +248,11 @@ if num_levels > 0: if os.path.exists(dir_path) and os.path.isdir(dir_path): print "\nDirectory {:s}:".format(dir_path) + sys.stdout.flush() subprocess.call("ls -la {:s} 2>&1".format(dir_path), shell=True) else: print "Path either does not exist, or is not a directory:\n\t{:s}.".format(dir_path) + sys.stdout.flush() if num_levels > 1: if os.path.exists(dir_path) and os.path.isdir(dir_path): for filename in os.listdir(dir_path): @@ -256,6 +261,7 @@ print_directory_contents(filename_path, num_levels-1) else: print "Path either does not exist, or is not a directory:\n\t{:s}.".format(dir_path) + sys.stdout.flush() def which(file): # This procedure is similar to the linux "which" command. @@ -346,6 +352,7 @@ except IOError: print "The success indication file could not be created: " + \ "{:s}".format(full_file_path) + sys.stdout.flush() raise def download_file_from_url(file_url, dest_dir, resume_download=True): @@ -369,7 +376,8 @@ source_filesize = size_of_file_at(file_url) print "Downloading {:s}\nSize of the file is {:d}".format(file_url, source_filesize) print "Destination file for the download is {:s}".format(dest_fullpath) - + sys.stdout.flush() + # If the file exists and resume_download is requested, then only download the remainder if resume_download and os.path.exists(dest_fullpath): existing_size = os.path.getsize(dest_fullpath) @@ -386,11 +394,15 @@ output_file = open(dest_fullpath,"ab") else: if os.path.exists(dest_fullpath): - print "resume_download is set to False. Download will overwrite an existing file." + print "The destination file exists:\n\t{:s}".format(dest_fullpath) + print "However a new download has been requested." + print "The download will overwrite the existing file." else: print "The destination file does not exist yet." existing_size = 0 output_file = open(dest_fullpath,"wb") + sys.stdout.flush() + try: # Check whether there is enough space on the device for the rest of the file to download. statvfs = os.statvfs(dest_dir) @@ -415,6 +427,7 @@ source_file.close() except IOError: print "Error while attempting to download {:s}".format(file_url) + sys.stdout.flush() raise finally: output_file.close() @@ -424,6 +437,7 @@ print "Downloaded {:s} bytes from {:s}".format(str(bytes_read), str(file_url)) dest_filesize = os.path.getsize(dest_fullpath) print "{:s} {:s}".format(str(dest_filesize), str(dest_fullpath)) + sys.stdout.flush() if source_filesize != dest_filesize: raise IOError("Download error:\n\t" + \ "The source file\n\t\t{:d}\t{:s}\n\t".format(source_filesize, file_url) + \ @@ -452,6 +466,7 @@ except os.error: print "ERROR: Trying to create the following directory path:" print "\t{:s}".format(cannonical_destination) + sys.stdout.flush() raise # Make sure the directory now exists and we can write to it. if not os.path.exists(cannonical_destination): @@ -469,6 +484,7 @@ except IOError: print "The destination directory could not be written into:\n\t" + \ "{:s}".format(cannonical_destination) + sys.stdout.flush() raise # Check whether there are numbytes available on cannonical_destination's device. statvfs = os.statvfs(cannonical_destination) @@ -516,6 +532,7 @@ print "Downloading:\n\t{:s}".format(str(source_url)) print "to:\n\t{:s}".format(destination) + sys.stdout.flush() # The next is done so that if the source_url does not have a genome name in it, an error will be raised. find_genome_name_in_path(source_url, raise_error=True) cannonical_destination = ensure_we_can_write_numbytes_to(destination, size_of_file_at(source_url)) @@ -547,6 +564,7 @@ md5sum_from_file = md5sum_for(dest_fullpath) except IOError: print "Error while attempting to check the md5sum for {:s}".format(dest_fullpath) + sys.stdout.flush() raise if md5sum_from_web != md5sum_from_file: raise IOError("Download error:\n\t" + \ @@ -565,12 +583,15 @@ dest_fullpath = os.path.join(cannonical_destination, dest_filename) else: print "download_genome_archive(): This code should never be printed. Something is wrong." - + sys.stdout.flush() + # Some code to help us if errors occur. print "\n*******************************" print "* Finished download. *" + sys.stdout.flush() print_directory_contents(cannonical_destination, 1) print "*******************************\n" + sys.stdout.flush() return dest_fullpath @@ -606,12 +627,15 @@ print "Remove the success file or set <force new extraction> if you want a new extraction to occur." else: print "extract_archive(): This code should never be printed. Something is wrong." + sys.stdout.flush() # Some code to help us if errors occur. print "\n*******************************************************" print "* Finished extraction. Destination directory listing. *" + sys.stdout.flush() print_directory_contents(cannonical_destination, 1) print "*******************************************************\n" + sys.stdout.flush() return def extract_genome_file(archive_filepath, destination, force_new_extraction=False, keep_archive=False): @@ -626,6 +650,7 @@ print "Extracting:\n\t {:s}".format(str(archive_filepath)) print "to:\n\t{:s}".format(destination) + sys.stdout.flush() cannonical_destination = ensure_we_can_write_numbytes_to(destination, bytes_needed_to_extract(archive_filepath)) # Get the root filename of the Genome Directory from the source file's name. # That should also be the name of the extracted directory. @@ -670,6 +695,7 @@ # We are done extracting, so remove the archive file. if os.path.exists(archive_filepath): print "Removing the archive file:\n\t{:s}".format(archive_filepath) + sys.stdout.flush() os.remove(archive_filepath) # else: # It was removed previously, so we don't need to remove it again. return extracted_directory @@ -700,12 +726,15 @@ subprocess.check_call(command, shell=True) except subprocess.CalledProcessError: print "ERROR: While trying to run the gmap_build command on the library:\n\t{:s}".format(command) + sys.stdout.flush() raise finally: # Some code to help us if errors occur. print "\n*******************************\nAfter running gmap_build." + sys.stdout.flush() print_directory_contents(genome_build_directory, 2) print "*******************************\n" + sys.stdout.flush() create_success_file(gmap_success_full_file_path, \ "gmap of:\n\t{:s}\nsucceeded.".format(genome_build_directory)) elif gmap_success_filename in orig_files_in_build_dir: @@ -714,6 +743,7 @@ print "Remove the file or set <force new gmap> if you want a new gmap to occur." else: print "gmap_the_library(): This code should never be printed. Something is wrong." + sys.stdout.flush() return @@ -745,7 +775,8 @@ bytes_needed_to_build(genome_source_directory)) print "Building the CTAT Genome Resource Library from source data at:\n\t{:s}".format(str(genome_source_directory)) print "The Destination directory is at:\n\t{:s}".format(str(cannonical_destination)) - + sys.stdout.flush() + # Get the root filename of the Genome Directory. src_filename = os.path.basename(genome_source_directory) # See whether the library has been built already. The success file is written into the source directory. @@ -785,6 +816,7 @@ # to know if an error occurred. command += " 2>&1" print "About to run the following command:\n\t{:s}".format(command) + sys.stdout.flush() try: # to send the prep_genome_lib command. subprocess.check_call(command, shell=True) except subprocess.CalledProcessError: @@ -795,10 +827,13 @@ # Some code to help us if errors occur. print "\n*******************************" print "Contents of Genome Source Directory {:s}:".format(genome_source_directory) + sys.stdout.flush() print_directory_contents(genome_source_directory, 2) print "\nContents of Genome Build Directory {:s}:".format(cannonical_destination) + sys.stdout.flush() print_directory_contents(cannonical_destination, 2) print "*******************************\n" + sys.stdout.flush() create_success_file(build_success_file_path, \ "Build of:\n\t{:s}\n".format(genome_source_directory) + \ "to:\n\t{:s}\nsucceeded.".format(cannonical_destination)) @@ -819,7 +854,7 @@ # gmap_the_library creates a gmap success file if it succeeds. else: print "build_the_library(): This code should never be printed. Something is wrong." - + sys.stdout.flush() return # End of build_the_library() @@ -914,6 +949,7 @@ print "\n***********************************" print "* Integrating Mutation Resources. *" print "***********************************\n" + sys.stdout.flush() # It is assumed that this procedure is only called with a valid genome_build_directory. url_parts = urlparse.urlparse(source_url) source_filename = os.path.basename(url_parts.path) @@ -926,6 +962,7 @@ print "Download and Integrate a Mutation Resource Archive." print "The source URL is:\n\t{:s}".format(str(source_url)) print "The destination is:\n\t{:s}".format(str(cannonical_destination)) + sys.stdout.flush() # Get the list of files in the directory, # We use it to check for a previous download or extraction among other things. orig_files_in_destdir = set(os.listdir(cannonical_destination)) @@ -952,6 +989,7 @@ print "Remove the file or set <new_mutation_download> if you want a new download to occur." else: print "download_and_integrate_mutation_resources() - Download: This code should never be printed. Something is wrong." + sys.stdout.flush() # INTEGRATION SECTION integration_success_file = "{:s}.{:s}".format(source_filename, _MutationIntegrationSuccessFile) @@ -994,13 +1032,16 @@ subprocess.check_call(command, shell=True) except subprocess.CalledProcessError: print "ERROR: While trying to integrate the mutation resources:\n\t{:s}".format(command) + sys.stdout.flush() raise finally: # Some code to help us if errors occur. print "/n*********************************************************" print "* After download and integration of Mutation Resources. *" + sys.stdout.flush() print_directory_contents(cannonical_destination, 2) print "*********************************************************\n" + sys.stdout.flush() create_success_file(integration_success_file_path, \ "Download and integration of mutation resources:\n\t{:s}\n".format(source_url) + \ "to:\n\t{:s}\nsucceeded.".format(genome_build_directory)) @@ -1010,6 +1051,7 @@ print "Remove the file or set <new_mutation_integration> if you want a new integration to occur." else: print "download_and_integrate_mutation_resources() - Integration: This code should never be printed. Something is wrong." + sys.stdout.flush() return def search_for_genome_build_dir(top_dir_path): @@ -1037,15 +1079,18 @@ "The given directory is not a directory:\n\t{:s}".format(top_dir_full_path)) if os.path.basename(top_dir_full_path) == _CTAT_Build_dirname: print "Build directory is: {:s}".format(top_dir_full_path) + sys.stdout.flush() # The top_dir_path is the path to the genome_build_directory. genome_build_directory = top_dir_full_path else: # Look for it inside of the top_dir_path directory. print "Looking inside of: {:s}".format(top_dir_full_path) + sys.stdout.flush() top_dir_contents = os.listdir(top_dir_full_path) if (_CTAT_Build_dirname in top_dir_contents): # The genome_build_directory is inside of the top_dir_path directory. print "1. Found it." + sys.stdout.flush() genome_build_directory = "{:s}/{:s}".format(top_dir_full_path,_CTAT_Build_dirname) else: # Find all subdirectories containing the _CTAT_Build_dirname or the _CTAT_RefGenome_Filename. @@ -1085,45 +1130,56 @@ print "\n***************************************" print "Found multiple CTAT Genome Resource Libraries " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path) + sys.stdout.flush() print_directory_contents(top_dir_full_path, 2) print "***************************************\n" + sys.stdout.flush() raise ValueError("Found multiple CTAT Genome Resource Libraries " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path)) elif len(build_dirs_in_subdirs) == 1: # The genome_build_directory is inside of the subdir_path directory. print "2b, Found it." + sys.stdout.flush() genome_build_directory = build_dirs_in_subdirs[0] elif len(build_dirs_in_sub_subdirs) == 1: # The genome_build_directory is inside of the subdir_path directory. print "3b, Found it." + sys.stdout.flush() genome_build_directory = build_dirs_in_sub_subdirs[0] elif (len(sub_subdirs_with_genome_files) + len(subdirs_with_genome_files)) > 1: print "\n***************************************" print "Unable to find CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path) print "And multiple directories contain {:s}".format(_CTAT_RefGenome_Filename) + sys.stdout.flush() print_directory_contents(top_dir_full_path, 2) print "***************************************\n" + sys.stdout.flush() raise ValueError("Unable to find CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path)) elif (len(sub_subdirs_with_genome_files) == 1): print "3c, Maybe found it." + sys.stdout.flush() genome_build_directory = sub_subdirs_with_genome_files[0] print_warning = True elif (len(subdirs_with_genome_files) == 1): print "2c, Maybe found it." + sys.stdout.flush() genome_build_directory = subdirs_with_genome_files[0] print_warning = True elif (_CTAT_RefGenome_Filename in top_dir_contents): print "1c. Maybe found it." + sys.stdout.flush() genome_build_directory = top_dir_full_path print_warning = True else: print "\n***************************************" print "Unable to find CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path) + sys.stdout.flush() print_directory_contents(top_dir_full_path, 2) print "***************************************\n" + sys.stdout.flush() raise ValueError("Unable to find CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path)) # end else @@ -1132,8 +1188,10 @@ print "\n***************************************" print "Cannot find the CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path) + sys.stdout.flush() print_directory_contents(top_dir_full_path, 2) print "***************************************\n" + sys.stdout.flush() raise ValueError("Cannot find the CTAT Genome Resource Library " + \ "in the given directory:\n\t{:s}".format(top_dir_full_path)) else: @@ -1141,15 +1199,19 @@ print "\n***************************************" print "\nWARNING: Cannot find Genome Reference file {:s} ".format(_CTAT_RefGenome_Filename) + \ "in the genome build directory:\n\t{:s}".format(genome_build_directory) + sys.stdout.flush() print_directory_contents(genome_build_directory, 2) print "***************************************\n" + sys.stdout.flush() if print_warning and genome_build_directory: print "\n***************************************" print "\nWARNING: Cannot find the CTAT Genome Resource Library, " + \ "but found a {:s} file, so set its directory as the library.".format(_CTAT_RefGenome_Filename) print "This my not be the correct directory:\n\t{:s}".format(genome_build_directory) + sys.stdout.flush() print_directory_contents(genome_build_directory, 2) print "***************************************\n" + sys.stdout.flush() return genome_build_directory def build_directory_from_build_location(src_filename, build_location): @@ -1266,7 +1328,8 @@ # os.mkdir(target_directory) print "The value of download_url argument is:\n\t{:s}".format(str(args.download_url)) - + sys.stdout.flush() + lib_was_built = False extracted_directory = None source_data_directory = None @@ -1289,10 +1352,11 @@ destination=args.download_location, \ force_new_download=args.new_archive_download) print "\nThe downloaded file is:\n\t{:s}.\n".format(str(downloaded_filename_full_path)) - + sys.stdout.flush() if ctat_library_type(downloaded_filename_full_path) == _LIBTYPE_SOURCE_DATA: print "It is source data." + sys.stdout.flush() # If it is source_data, extract to download_location (the directory where the download was placed). extracted_directory = extract_genome_file(archive_filepath=downloaded_filename_full_path, \ destination=args.download_location, \ @@ -1309,6 +1373,7 @@ elif ctat_library_type(downloaded_filename_full_path) == _LIBTYPE_PLUG_N_PLAY: print "It is plug-n-play data." + sys.stdout.flush() if build_location_is_set: # Extract to the build location. The library is already built. extracted_directory = extract_genome_file(archive_filepath=downloaded_filename_full_path, \ @@ -1338,7 +1403,8 @@ elif source_location_is_set: # Then the user wants to build the directory from the source data. source_data_directory = os.path.realpath(args.source_location) - print "\nThe user is saying the source data is in:\n\t{:s}.\n".format(str(source_data_directory)) + print "\nThe program is being told that the source data is in:\n\t{:s}.\n".format(str(source_data_directory)) + sys.stdout.flush() if build_location_is_set: genome_build_directory = build_directory_from_build_location(source_data_directory, args.build_location) else: @@ -1354,6 +1420,7 @@ print "\nThe location where the CTAT Genome Resource Library exists " + \ "or will be built is {:s}.\n".format(str(genome_build_directory)) + sys.stdout.flush() # To take out builds for testing, comment out the lines that do the building. # The command that builds the ctat genome library also has an option for building the gmap indexes. @@ -1402,6 +1469,7 @@ if genome_name is None: genome_name = _CTAT_ResourceLib_DefaultGenome print "WARNING: We could not find a genome name in any of the directory paths." + sys.stdout.flush() # Determine the display_name for the library. if (args.display_name is None) or (args.display_name == ""): @@ -1418,6 +1486,7 @@ print "The Genome Resource Library's display_name will be set to: {:s}\n".format(display_name) print "Its unique_id will be set to: {:s}\n".format(unique_id) print "Its dir_path will be set to: {:s}\n".format(genome_build_directory) + sys.stdout.flush() data_manager_dict = {} data_manager_dict['data_tables'] = {} @@ -1427,6 +1496,7 @@ # Temporarily the output file's dictionary is written for debugging: print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict)) + sys.stdout.flush() # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager, # which then puts it into the correct .loc file (I think). # Comment out the following line when testing without galaxy package.