changeset 10:a7cd51b60f58 draft

Uploaded
author trinity_ctat
date Mon, 09 Jul 2018 13:15:58 -0400
parents 1717c42112ed
children 57428396c6e4
files data_manager/add_ctat_resource_lib.py data_manager/add_ctat_resource_lib.xml
diffstat 2 files changed, 127 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/add_ctat_resource_lib.py	Sat Jun 23 16:06:17 2018 -0400
+++ b/data_manager/add_ctat_resource_lib.py	Mon Jul 09 13:15:58 2018 -0400
@@ -506,6 +506,7 @@
             # even though no error has occurred. We will depend on error code return in order
             # to know if an error occurred.
             command += " 2>&1"
+            print "About to run the following command:\n\t{:s}".format(command)
             try: # to send the prep_genome_lib command.
                 command_output = subprocess.check_call(command, shell=True)
             except subprocess.CalledProcessError:
@@ -688,32 +689,49 @@
     return genome_name
 
 def main():
-    #Parse Command Line
+    #Parse Command Line. There are three basic ways to use this tool.
+    # 1) Download and Build the CTAT Genome Resource Library from an archive.
+    # 2) Build the library from source data files that are already downloaded.
+    # 3) Specify the location of an already built library.
+    # Any of these methods can be incorporate or be followed by a gmap build.
+    # Choose arguments for only one method.
+    # Do not use arguments in a mixed manner. I am not writing code to handle that at this time.
     parser = argparse.ArgumentParser()
-    parser.add_argument('-s', '--source_url', default='', \
-        help='This is the url of a file with the data. ' + \
-            'They come from https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/.')
-    parser.add_argument('-n', '--display_name', default='', \
-        help='Is used as the display name for the entry of this Genome Resource Library in the data table.')
+    # Arguments for all methods:
     parser.add_argument('-o', '--output_filename', \
         help='Name of the output file, where the json dictionary will be written.')
-    parser.add_argument('-d', '--force_download', \
-        help='Forces download of the Genome Resource Library, even if previously downloaded.', action='store_true')
-    parser.add_argument('-b', '--build', \
-        help='Forces build/rebuild the Genome Resource Library, even if previously built. ' + \
-             'Must have downloaded source_data for this to work.', action='store_true')
+    parser.add_argument('-y', '--display_name', default='', \
+        help='Is used as the display name for the entry of this Genome Resource Library in the data table.')
     parser.add_argument('-g', '--gmap_build', \
         help='Must be selected if you want the library to be gmapped. ' + \
              'Will force gmap_build of the Genome Resource Library, even if previously gmapped.', action='store_true')
-    parser.add_argument('-m', '--download_mutation_indexes', default='', \
+    parser.add_argument('-m', '--download_mutation_indexes_url', default='', \
         help='Set to the url of the mutation indexes for the Library. ' + \
              'Will download mutation indexes into the Genome Resource Library.', action='store_true')
-    parser.add_argument('-f', '--force_mutation_indexes_download', \
+    parser.add_argument('-i', '--new_mutation_indexes_download', \
         help='Forces the mutation indexes to download, ' + \
              'even if previously downloaded to this Library.', action='store_true')
-    requiredNamed = parser.add_argument_group('required named arguments')
-    requiredNamed.add_argument('-p', '--destination_path', required=True, \
-        help='Full path of the CTAT Resource Library location or destination, either where it is, or where it will be placed.')
+    # Method 1) arguments - Download and Build.
+    download_and_build_args = parser.add_argument_group('Download and Build arguments')
+    download_and_build_args.add_argument('-u', '--download_url', default='', \
+        help='This is the url of am archive file containing the library files. ' + \
+            'These are located at https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/.')
+    download_and_build_args.add_argument('-d', '--download_location', default='', \
+        help='Full path of the CTAT Resource Library download location, where the download will be placed. If the archive file has already had been successfully downloaded, it will only be downloaded again if --new_download is selected.')
+    download_and_build_args.add_argument('-a', '--new_archive_download', \
+        help='Forces download of the Genome Resource Library, even if previously downloaded to the download_destination.', action='store_true')
+    # Method 2) arguments - Specify location of source and build.
+    specify_source_and_build_args = parser.add_argument_group('Specify Source and Build arguments')
+    specify_source_and_build_args.add_argument('-s', '--source_location', default='', \
+        help='Full path to the location of CTAT Resource Library source files. The --build_location must also be set.')
+    specify_source_and_build_args.add_argument('-r', '--rebuild', \
+        help='Forces build/rebuild the CTAT Genome Resource Library, even if previously built. ' + \
+             'Must specify location of the source_data for this to work.', action='store_true')
+    # Method 3) arguments - Specify the location of a built library.
+    built_lib_location_arg = parser.add_argument_group('Specify location of built library arguments')
+    built_lib_location_arg.add_argument('-b', '--build_location', default='', \
+        help='Full path to the location of a built CTAT Genome Resource Library, either where it is, or where it will be placed.')
+
     args = parser.parse_args()
 
     # All of the input parameters are written by default to the output file prior to
@@ -724,7 +742,7 @@
     # target_directory = params['output_data'][0]['extra_files_path']
     # os.mkdir(target_directory)
 
-    print "The value of source_url argument is:\n\t{:s}".format(str(args.source_url))
+    print "The value of download_url argument is:\n\t{:s}".format(str(args.download_url))
 
     # FIX - not sure lib_was_downloaded actually serves a purpose...
     # The original intent was to check whether an attempted download actually succeeded before proceeding,
@@ -734,26 +752,48 @@
     # and does not re-download them.
     lib_was_downloaded = False
     lib_was_built = False
-    download_has_source_data = False
     downloaded_directory = None
+    source_data_directory = None
     genome_build_directory = None
     # FIX - need to make sure we are handling all "possible" combinations of arguments.
     # Probably would be good if we could simplify/remove some of them.
     # But I think the current interface is using them all.
-    if (args.source_url != ""):
+
+    if (args.download_url != ""):
+        if (args.source_location):
+            raise ValueError("Argument --source_location cannot be used in combination with --download_url.")
+        if (args.build_location):
+            raise ValueError("Argument --build_location cannot be used in combination with --download_url.")
+        if (args.download_location is None) or (args.download_location == ""):
+            raise ValueError("Argument --download_url requires that --download_location be specified.")
         downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded = \
-            download_from_BroadInst(source=args.source_url, \
-                                    destination=args.destination_path, \
-                                    force_download=args.force_download)
+            download_from_BroadInst(source=args.download_url, \
+                                    destination=args.download_location, \
+                                    force_download=args.new_archive_download)
+        print "\nThe location of the downloaded_directory is {:s}.\n".format(str(downloaded_directory))
+        if download_has_source_data:
+            print "It is source data."
+            source_data_directory = downloaded_directory
+            if (genome_build_directory == None) or (genome_build_directory == ""):
+                raise ValueError("Programming Error: The location for building the genome_build_directory " + \
+                    "was not returned by download_from_BroadInst()")
+        else:
+            print "It is plug-n-play data."
+            genome_build_directory = search_for_genome_build_dir(downloaded_directory)
+    elif (args.source_location):
+        # Then the user wants to build the directory from the source data.
+        if (args.build_location is None) or (args.build_location == ""):
+            raise ValueError("Argument --source_location requires that --build_location be specified.")
+        source_data_directory = os.path.realpath(args.source_location)
+        genome_build_directory = os.path.realpath(args.build_location)
+        print "\nThe location of the source data is {:s}.\n".format(str(source_data_directory))
+    elif (args.build_location is not None) and (args.build_location != ""):
+        genome_build_directory = args.build_location
     else:
-        if (args.build):
-            # Then the user wants to build the directory from the data
-            # in the location that was given in destination_path.
-            downloaded_directory = args.destination_path
-        genome_build_directory = search_for_genome_build_dir(args.destination_path)
-
-    print "\nThe location of the downloaded_directory is {:s}.\n".format(str(downloaded_directory))
-    print "\nThe location of the CTAT Genome Resource Library is {:s}.\n".format(genome_build_directory)
+        raise ValueError("One of --download_url, --source_location, or --build_location must be specified.")
+        
+    print "\nThe location where the CTAT Genome Resource Library exists " + \
+        "or will be built is {:s}.\n".format(genome_build_directory)
 
     # FIX - We should leave a file indicating build success the same way we do for download success.
     # To take out builds for testing, comment out the lines that do the building.
@@ -761,17 +801,23 @@
     # That is why the gmap_build value is sent to build_the_library(), but if we are not building the
     # library, the user might still be asking for a gmap_build. That is done after rechecking for the
     # genome_build_directory.
-    if (downloaded_directory is not None) and (download_has_source_data or args.build):
-        build_the_library(downloaded_directory, genome_build_directory, True, args.gmap_build)
+    if (source_data_directory is not None):
+        build_the_library(source_data_directory, \
+                          genome_build_directory, \
+                          args.rebuild, \
+                          args.gmap_build)
         lib_was_built = True
-    elif downloaded_directory is None:
-        print "No directory was downloaded and there is no source data, " + \
-            "so the Resource Library was not built (it may already be built)."
+    elif genome_build_directory is None:
+        raise ValueError("No CTAT Genome Resource Library was downloaded, " + \
+            "there is no source data specified, " + \
+            "and no build location has been set. " + \
+            "This line of code should never execute.")
     # The following looks to see if the library actually exists after the build,
     # and raises an error if it cannot find the library files.
     # The reassignment of genome_build_directory should be superfluous, 
     # since genome_build_directory should already point to the correct directory,
-    # unless I made a mistake in the build code.
+    # unless I made a mistake somewhere above.
+
     genome_build_directory = search_for_genome_build_dir(genome_build_directory)
 
     if (args.gmap_build and not lib_was_built):
@@ -779,19 +825,21 @@
         # the user might still be asking for a gmap_build.
         gmap_the_library(genome_build_directory)
 
-    if (args.download_mutation_indexes != ""):
-        download_mutation_indexes(source_url=args.download_mutation_indexes, \
+    if (args.download_mutation_indexes_url != ""):
+        download_mutation_indexes(source_url=args.download_mutation_indexes_url, \
                                   genome_build_directory=genome_build_directory, \
-                                  force_download=args.force_mutation_indexes_download)
+                                  force_download=args.new_mutation_indexes_download)
 
     # Need to get the genome name.
-    genome_name = find_genome_name_in_path(args.source_url)
+    genome_name = find_genome_name_in_path(args.download_url)
     if genome_name is None:
         genome_name = find_genome_name_in_path(genome_build_directory)
     if genome_name is None:
         genome_name = find_genome_name_in_path(downloaded_directory)
     if genome_name is None:
-        genome_name = find_genome_name_in_path(args.destination_path)
+        genome_name = find_genome_name_in_path(args.source_location)
+    if genome_name is None:
+        genome_name = find_genome_name_in_path(args.download_location)
     if genome_name is None:
         genome_name = find_genome_name_in_path(args.display_name)
     if genome_name is None:
--- a/data_manager/add_ctat_resource_lib.xml	Sat Jun 23 16:06:17 2018 -0400
+++ b/data_manager/add_ctat_resource_lib.xml	Mon Jul 09 13:15:58 2018 -0400
@@ -9,27 +9,28 @@
     </requirements>
     <command detect_errors="default">
         <![CDATA[
-        python $__tool_directory__/add_ctat_resource_lib.py 
-            --display_name "${display_name}" 
-            --destination_path "${destination}" 
-            --output_filename "${out_file}" 
-            #if str( $genome_resource_library.download ) == "true":
-                --source_url "${genome_resource_library.source_url}" 
-                #if str( $genome_resource_library.force_download ) == "true":
-                    --force_download
-                #end if
+          python $__tool_directory__/add_ctat_resource_lib.py 
+            --output_filename="${out_file}" 
+            --display_name="${display_name}" 
+            #if str($genome_resource_library.build_type) == "download_and_build":
+              --download_url="${genome_resource_library.download_url}" 
+              --download_location="${genome_resource_library.download_destination}"
+              #if str($genome_resource_library.force_new_download) == "true":
+                --new_archive_download
+              #end if
             #end if
-            #if str( $rebuild ) == "true":
-                --build 
-            #end if
-            #if str( $gmap_build ) == "true":
-                --gmap_build 
+            #if str($genome_resource_library.build_type) == "build_from_source":
+              --source_location "${genome_resource_library.source_location}"
+              --build_location "${genome_resource_library.built_library_location}" 
+              #if str($genome_resource_library.rebuild) == "true":
+                --rebuild
+              #end if
             #end if
-            #if str( $mutation_indexes.download ) == "true":
-                --download_mutation_indexes "${mutation_indexes.source_url}" 
-                #if str( $mutation_indexes.force_download ) == "true":
-                    --force_mutation_indexes_download
-                #end if
+            #if str($genome_resource_library.build_type) == "specify_built_location":
+              --build_location="${genome_resource_library.built_library_location}"
+            #end if
+            #if str($gmap_build) == "true":
+              --gmap_build 
             #end if
         ]]>
     </command>
@@ -45,9 +46,12 @@
             </when>
         -->
         <conditional name="genome_resource_library">
-            <param name="download" type="boolean" checked="false" label="Download CTAT Genome Resource Library?">
+            <param name="build_type" type="select" label="Download CTAT Genome Resource Library?">
+                <option value="download_and_build" selected="true">Download from CTAT and build if needed</option>
+                <option value="build_from_source">Build library from local source data</option>
+                <option value="specify_built_location">Specify location of built library</option>
             </param>
-            <when value="true">
+            <when value="download_and_build">
                 <!-- The use of a code block to get dynamic options is now deprecated and discouraged.
                      I am still using it here. The only other way I can think of to do this is to
                      create another data_manager that gets the list of files and puts them into a
@@ -79,20 +83,25 @@
                         Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz
                     </option>
                 -->
-                <param name="source_url" type="select" label="Select a File"
+                <param name="download_url" type="select" label="Select a File"
                     dynamic_options="get_ctat_genome_urls()" 
                     help="Select a CTAT Genome Resource Library to Download.">
                 </param>
-                <param name="force_download" type="boolean" checked="false" label="Force New Download?" />
+                <param name="download_destination" type="text" label="Download Destination (full path)" />
+                <param name="force_new_download" type="boolean" checked="false" label="Force New Download?" />
+            </when>
+            <when value="build_from_source">
+                <param name="source_location" type="text" label="Location of Source Files (full path)" />
+                <param name="built_library_location" type="text" label="Location of the Built Library (full path)" />
+                <param name="rebuild" type="boolean" checked="false" label="Force new build of Library?" />
+            </when>
+            <when value="specify_built_location">
+                <param name="built_library_location" type="text" label="Location of the Built Library (full path)" />
             </when>
         </conditional>
-
         <param name="display_name" type="text" label="Reference Genome Display Name" />
-        <param name="destination" type="text" label="Local Destination (full path)" />
-        <param name="rebuild" type="boolean" checked="false" label="Force rebuild of Library?" />
         <param name="gmap_build" type="boolean" checked="true" label="Do a gmap_build on the Library?" />
-        <!-- <param name="mutation_indexes" type="boolean" checked="true" label="Download mutation indexes into the Library?" />
-        -->
+        <!-- Below is the most recent interface for download of mutation indexes, but it is not being used yet...
         <conditional name="mutation_indexes">
             <param name="download" type="boolean" checked="true" label="Download mutation indexes into the Library?">
             </param>
@@ -104,6 +113,7 @@
                 <param name="force_download" type="boolean" checked="false" label="Force New Download?" />
             </when>
         </conditional>
+        -->
     </inputs>
     <outputs>
         <data name="out_file" format="data_manager_json" />