0
|
1 #!/usr/bin/env python
|
|
2 # ref: https://galaxyproject.org/admin/tools/data-managers/how-to/define/
|
|
3
|
|
4 # Rewritten by H.E. Cicada Brokaw Dennis from a source downloaded from the toolshed and
|
|
5 # other example code on the web.
|
|
6 # This now allows downloading of a user selected library
|
|
7 # but only from the CTAT Genome Resource Library website.
|
|
8 # Ultimately we might want to allow the user to specify any location
|
|
9 # from which to download.
|
|
10 # Users can create or download other libraries and use this tool to add them if they don't want
|
|
11 # to add them by hand.
|
|
12
|
|
13 import argparse
|
|
14 import os
|
|
15 #import tarfile
|
|
16 #import urllib
|
|
17 import subprocess
|
|
18
|
|
19 # Comment out the following line when testing without galaxy package.
|
|
20 from galaxy.util.json import to_json_string
|
|
21 # The following is not being used, but leaving as info
|
|
22 # in case we ever want to get input values using json.
|
|
23 # from galaxy.util.json import from_json_string
|
|
24
|
|
25 # datetime.now() is used to create the unique_id
|
|
26 from datetime import datetime
|
|
27
|
|
28 # The FileListParser is used by get_ctat_genome_filenames(),
|
|
29 # which is called by the Data Manager interface (.xml file) to get
|
|
30 # the filenames that are available online at broadinstitute.org
|
|
31 # Not sure best way to do it.
|
|
32 # This object uses HTMLParser to look through the html
|
|
33 # searching for the filenames within anchor tags.
|
|
34 import urllib2
|
|
35 from HTMLParser import HTMLParser
|
|
36
|
|
37 _CTAT_ResourceLib_URL = 'https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/'
|
|
38 _CTAT_BuildDir_Name = 'ctat_genome_lib_build_dir'
|
|
39 _CTAT_ResourceLib_DisplayNamePrefix = 'CTAT_GenomeResourceLib_'
|
|
40 _CTAT_ResourceLib_DefaultGenome = 'Unspecified_Genome'
|
|
41 _NumBytesNeededForBuild = 64424509440 # 60 Gigabytes. FIX - This might not be correct.
|
|
42 _Download_TestFile = "write_testfile.txt"
|
|
43 _DownloadSuccessFile = 'download_succeeded.txt'
|
|
44
|
|
45 class FileListParser(HTMLParser):
|
|
46 def __init__(self):
|
|
47 # Have to use direct call to super class rather than using super():
|
|
48 # super(FileListParser, self).__init__()
|
|
49 # because HTMLParser is an "old style" class and its inheritance chain does not include object.
|
|
50 HTMLParser.__init__(self)
|
|
51 self.urls = set()
|
|
52 def handle_starttag(self, tag, attrs):
|
|
53 # Look for filename references in anchor tags and add them to urls.
|
|
54 if tag == "a":
|
|
55 # The tag is an anchor tag.
|
|
56 for attribute in attrs:
|
|
57 # print "Checking: {:s}".format(str(attribute))
|
|
58 if attribute[0] == "href":
|
|
59 # Does the href have a tar.gz in it?
|
|
60 if ("tar.gz" in attribute[1]) and ("md5" not in attribute[1]):
|
|
61 # Add the value to urls.
|
|
62 self.urls.add(attribute[1])
|
|
63 # End of class FileListParser
|
|
64
|
|
65 def get_ctat_genome_urls():
|
|
66 # open the url and retrieve the urls of the files in the directory.
|
|
67 resource = urllib2.urlopen(_CTAT_ResourceLib_URL)
|
|
68 theHTML = resource.read()
|
|
69 filelist_parser = FileListParser()
|
|
70 filelist_parser.feed(theHTML)
|
|
71 # For dynamic options need to return an interable with contents that are tuples with 3 items.
|
|
72 # Item one is a string that is the display name put into the option list.
|
|
73 # Item two is the value that is put into the parameter associated with the option list.
|
|
74 # Item three is a True or False value, indicating whether the item is selected.
|
|
75 options = []
|
|
76 for i, url in enumerate(filelist_parser.urls):
|
|
77 # The urls look like:
|
|
78 # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz
|
|
79 # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/Mouse_M16_CTAT_lib_Feb202018.source_data.tar.gz
|
|
80 filename = url.split("/")[-1]
|
|
81 if filename.split("_")[0] != "Mouse":
|
|
82 # Take out the mouse genome options for now.
|
|
83 # The mouse genome option is not handled correctly yet
|
|
84 options.append((filename, url, i == 0))
|
|
85 # return a tuple of the urls
|
|
86 return options
|
|
87
|
|
88 # The following was used by the example program to get input parameters through the json.
|
|
89 # Just leaving here for reference.
|
|
90 # We are getting all of our parameter values through command line arguments.
|
|
91 #def get_reference_id_name(params):
|
|
92 # genome_id = params['param_dict']['genome_id']
|
|
93 # genome_name = params['param_dict']['genome_name']
|
|
94 # return genome_id, genome_name
|
|
95 #
|
|
96 #def get_url(params):
|
|
97 # trained_url = params['param_dict']['trained_url']
|
|
98 # return trained_url
|
|
99
|
|
100 def download_from_BroadInst(source, destination, force_download):
|
|
101 # Input Parameters
|
|
102 # source is the full URL of the file we want to download.
|
|
103 # It should look something like:
|
|
104 # https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/GRCh37_v19_CTAT_lib_Feb092018.plug-n-play.tar.gz
|
|
105 # destination is the location where the source file will be unarchived.
|
|
106 # Relative paths are expanded using the current working directory, so within Galaxy,
|
|
107 # it is best to send in absolute fully specified path names so you know to where
|
|
108 # the source file going to be extracted.
|
|
109 # force_download will cause a new download and extraction to occur, even if the destination
|
|
110 # has a file in it indicating that a previous download succeeded.
|
|
111 #
|
|
112 # Returns the following:
|
|
113 # return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded)
|
|
114 # downloaded_directory
|
|
115 # The directory which was created as a subdirectory of the destination directory
|
|
116 # when the download occurred, or if there was no download,
|
|
117 # possibly the same directory as destination, if that is where the data resides.
|
|
118 # download_has_source_data
|
|
119 # Is a boolean indicating whether the source file was "source_data" or was "plug-n-play".
|
|
120 # genome_build_directory
|
|
121 # The directory where the genome resource library is or where it should be built.
|
|
122 # It can be the same as the downloaded directory, but is sometimes a subdirectory of it.
|
|
123 # lib_was_downloaded
|
|
124 # Since it doesn't always do the download, the function returns whether download occurred.
|
|
125 lib_was_downloaded = False
|
|
126
|
|
127 # Get the root filename of the Genome Directory.
|
|
128 src_filename = source.split("/")[-1]
|
|
129 root_genome_dirname = src_filename.split(".")[0]
|
|
130 # If the src_filename indicates it is a source file, as opposed to plug-n-play,
|
|
131 # then we may need to do some post processing on it.
|
|
132 type_of_download = src_filename.split(".")[1]
|
|
133 download_has_source_data = (type_of_download == "source_data")
|
|
134
|
|
135 # We want to make sure that destination is absolute fully specified path.
|
|
136 cannonical_destination = os.path.realpath(destination)
|
|
137 if os.path.exists(cannonical_destination):
|
|
138 if not os.path.isdir(cannonical_destination):
|
|
139 raise ValueError("The destination is not a directory: " + \
|
|
140 "{:s}".format(cannonical_destination))
|
|
141 # else all is good. It is a directory.
|
|
142 else:
|
|
143 # We need to create it.
|
|
144 try:
|
|
145 os.makedirs(cannonical_destination)
|
|
146 except os.error:
|
|
147 print "ERROR: Trying to create the following directory path:"
|
|
148 print "\t{:s}".format(cannonical_destination)
|
|
149 raise
|
|
150
|
|
151 # Make sure the directory now exists and we can write to it.
|
|
152 if not os.path.exists(cannonical_destination):
|
|
153 # It should have been created, but if it doesn't exist at this point
|
|
154 # in the code, something is wrong. Raise an error.
|
|
155 raise OSError("The destination directory could not be created: " + \
|
|
156 "{:s}".format(cannonical_destination))
|
|
157 test_writing_file = "{:s}/{:s}".format(cannonical_destination, _Download_TestFile)
|
|
158 try:
|
|
159 filehandle = open(test_writing_file, "w")
|
|
160 filehandle.write("Testing writing to this file.")
|
|
161 filehandle.close()
|
|
162 os.remove(test_writing_file)
|
|
163 except IOError:
|
|
164 print "The destination directory could not be written into: " + \
|
|
165 "{:s}".format(cannonical_destination)
|
|
166 raise
|
|
167
|
|
168 # Get the list of files in the directory,
|
|
169 # We use it to check for a previous download or extraction among other things.
|
|
170 orig_files_in_destdir = set(os.listdir(cannonical_destination))
|
|
171 # See whether the file has been downloaded already.
|
|
172 download_success_file_path = "{:s}/{:s}".format(cannonical_destination, _DownloadSuccessFile)
|
|
173 if ((_DownloadSuccessFile not in orig_files_in_destdir) \
|
|
174 or (root_genome_dirname not in orig_files_in_destdir) \
|
|
175 or force_download):
|
|
176 # Check whether there is enough space on the device for the library.
|
|
177 statvfs = os.statvfs(cannonical_destination)
|
|
178 # fs_size = statvfs.f_frsize * statvfs.f_blocks # Size of filesystem in bytes
|
|
179 # num_free_bytes = statvfs.f_frsize * statvfs.f_bfree # Actual number of free bytes
|
|
180 num_avail_bytes = statvfs.f_frsize * statvfs.f_bavail # Number of free bytes that ordinary users
|
|
181 # are allowed to use (excl. reserved space)
|
|
182 if (num_avail_bytes < _NumBytesNeededForBuild):
|
|
183 raise OSError("There is insufficient space ({:s} bytes)".format(str(num_avail_bytes)) + \
|
|
184 " on the device of the destination directory: " + \
|
|
185 "{:s}".format(cannonical_destination))
|
|
186
|
|
187 #Previous code to download and untar. Not using anymore.
|
|
188 #full_filepath = os.path.join(destination, src_filename)
|
|
189 #
|
|
190 #Download ref: https://dzone.com/articles/how-download-file-python
|
|
191 #f = urllib2.urlopen(source)
|
|
192 #data = f.read()
|
|
193 #with open(full_filepath, 'wb') as code:
|
|
194 # code.write(data)
|
|
195 #
|
|
196 #Another way to download:
|
|
197 #try:
|
|
198 # urllib.urlretrieve(url=source, filename=full_filepath)
|
|
199 #
|
|
200 #Then untar the file.
|
|
201 #try:
|
|
202 # tarfile.open(full_filepath, mode='r:*').extractall()
|
|
203
|
|
204 if (_DownloadSuccessFile in orig_files_in_destdir):
|
|
205 # Since we are redoing the download,
|
|
206 # the success file needs to be removed
|
|
207 # until the download has succeeded.
|
|
208 os.remove(download_success_file_path)
|
|
209 # We want to transfer and untar the file without storing the tar file, because that
|
|
210 # adds all that much more space to the needed amount of free space on the disk.
|
|
211 # Use subprocess to pipe the output of curl into tar.
|
|
212 command = "curl {:s} | tar -xzvf - -C {:s}".format(source, cannonical_destination)
|
|
213 try: # to send the command that downloads and extracts the file.
|
|
214 command_output = subprocess.check_output(command, shell=True)
|
|
215 # FIX - not sure check_output is what we want to use. If we want to have an error raised on
|
|
216 # any problem, maybe we should not be checking output.
|
|
217 except subprocess.CalledProcessError:
|
|
218 print "ERROR: Trying to run the following command:\n\t{:s}".format(command)
|
|
219 raise
|
|
220 else:
|
|
221 lib_was_downloaded = True
|
|
222
|
|
223 # Some code to help us if errors occur.
|
|
224 print "\n*******************************\nFinished download and extraction."
|
|
225 subprocess.check_call("ls -lad {:s}/*".format(cannonical_destination), shell=True)
|
|
226 subprocess.check_call("ls -lad {:s}/*/*".format(cannonical_destination), shell=True)
|
|
227
|
|
228 newfiles_in_destdir = set(os.listdir(cannonical_destination)) - orig_files_in_destdir
|
|
229 if (root_genome_dirname not in newfiles_in_destdir):
|
|
230 # Perhaps it has a different name than what we expected it to be.
|
|
231 # It will be the file that was not in the directory
|
|
232 # before we did the download and extraction.
|
|
233 found_filename = None
|
|
234 if len(newfiles_in_destdir) == 1:
|
|
235 found_filename = newfiles_in_destdir[0]
|
|
236 else:
|
|
237 for filename in newfiles_in_destdir:
|
|
238 # In most cases, there will only be one new file, but some OS's might have created
|
|
239 # other files in the directory.
|
|
240 # Look for the directory that was downloaded and extracted.
|
|
241 # The correct file's name should be a substring of the tar file that was downloaded.
|
|
242 if filename in src_filename:
|
|
243 found_filename = filename
|
|
244 if found_filename is not None:
|
|
245 root_genome_dirname = found_filename
|
|
246
|
|
247 downloaded_directory = "{:s}/{:s}".format(cannonical_destination, root_genome_dirname)
|
|
248
|
|
249 if (os.path.exists(downloaded_directory)):
|
|
250 try:
|
|
251 # Create a file to indicate that the download succeeded.
|
|
252 subprocess.check_call("touch {:s}".format(download_success_file_path), shell=True)
|
|
253 except IOError:
|
|
254 print "The download_success file could not be created: " + \
|
|
255 "{:s}".format(download_success_file_path)
|
|
256 raise
|
|
257 # Look for the build directory, or specify the path where it should be placed.
|
|
258 if len(os.listdir(downloaded_directory)) == 1:
|
|
259 # Then that one file is a subdirectory that should be the downloaded_directory.
|
|
260 subdir_filename = os.listdir(downloaded_directory)[0]
|
|
261 genome_build_directory = "{:s}/{:s}".format(downloaded_directory, subdir_filename)
|
|
262 else:
|
|
263 genome_build_directory = "{:s}/{:s}".format(downloaded_directory, _CTAT_BuildDir_Name)
|
|
264 else:
|
|
265 raise ValueError("ERROR: Could not find the extracted file in the destination directory:" + \
|
|
266 "\n\t{:s}".format(cannonical_destination))
|
|
267
|
|
268 return (downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded)
|
|
269
|
|
270 def gmap_the_library(genome_build_directory):
|
|
271 # This is the processing that needs to happen for gmap-fusion to work.
|
|
272 # genome_build_directory should normally be a fully specified path,
|
|
273 # though it should work if it is relative.
|
|
274 command = "gmap_build -D {:s}/ -d ref_genome.fa.gmap -k 13 {:s}/ref_genome.fa".format( \
|
|
275 genome_build_directory, genome_build_directory)
|
|
276 try: # to send the gmap_build command.
|
|
277 command_output = subprocess.check_output(command, shell=True)
|
|
278 except subprocess.CalledProcessError:
|
|
279 print "ERROR: While trying to run the gmap_build command on the library:\n\t{:s}".format(command)
|
|
280 raise
|
|
281 finally:
|
|
282 # Some code to help us if errors occur.
|
|
283 print "\n*******************************\nAfter running gmap_build."
|
|
284 if os.path.exists(genome_build_directory):
|
|
285 print "\nBuild Directory {:s}:".format(genome_build_directory)
|
|
286 subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True)
|
|
287 dir_entries = os.listdir(genome_build_directory)
|
|
288 for entry in dir_entries:
|
|
289 entry_path = "{:s}/{:s}".format(genome_build_directory, entry)
|
|
290 print "\nDirectory {:s}:".format(entry_path)
|
|
291 subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
|
|
292 if os.path.isdir(entry_path):
|
|
293 subdir_entries = os.listdir(entry_path)
|
|
294 for subdir_entry in subdir_entries:
|
|
295 subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry)
|
|
296 print "\nDirectory {:s}:".format(subdir_entry_path)
|
|
297 subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True)
|
|
298 else:
|
|
299 print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory)
|
|
300 print "*******************************"
|
|
301
|
|
302 def build_the_library(genome_source_directory, genome_build_directory, build, gmap_build):
|
|
303 """ genome_source_directory is the location of the source_data needed to build the library.
|
|
304 Normally it is fully specified, but could be relative.
|
|
305 genome_build_directory is the location where the library will be built.
|
|
306 It can be relative to the current working directory or an absolute path.
|
|
307 build specifies whether to run prep_genome_lib.pl even if it was run before.
|
|
308 gmap_build specifies whether to run gmap_build or not.
|
|
309
|
|
310 Following was the old way to do it. Before FusionFilter 0.5.0.
|
|
311 prep_genome_lib.pl \
|
|
312 --genome_fa ref_genome.fa \
|
|
313 --gtf ref_annot.gtf \
|
|
314 --blast_pairs blast_pairs.gene_syms.outfmt6.gz \
|
|
315 --fusion_annot_lib fusion_lib.dat.gz
|
|
316 --output_dir ctat_genome_lib_build_dir
|
|
317 index_pfam_domain_info.pl \
|
|
318 --pfam_domains PFAM.domtblout.dat.gz \
|
|
319 --genome_lib_dir ctat_genome_lib_build_dir
|
|
320 gmap_build -D ctat_genome_lib_build_dir -d ref_genome.fa.gmap -k 13 ctat_genome_lib_build_dir/ref_genome.fa"
|
|
321 """
|
|
322 if (genome_source_directory != "" ) and build:
|
|
323 if os.path.exists(genome_source_directory):
|
|
324 os.chdir(genome_source_directory)
|
|
325 # FIX - look for a fusion_annot_lib and include it, else omit it.
|
|
326 command = "prep_genome_lib.pl --genome_fa ref_genome.fa --gtf ref_annot.gtf " + \
|
|
327 "--fusion_annot_lib CTAT_HumanFusionLib.v0.1.0.dat.gz " + \
|
|
328 "--annot_filter_rule AnnotFilterRule.pm " + \
|
|
329 "--pfam_db PFAM.domtblout.dat.gz " + \
|
|
330 "--output_dir {:s} ".format(genome_build_directory)
|
|
331 if gmap_build:
|
|
332 command += "--gmap_build "
|
|
333 try: # to send the prep_genome_lib command.
|
|
334 command_output = subprocess.check_call(command, shell=True)
|
|
335 except subprocess.CalledProcessError:
|
|
336 print "ERROR: While trying to run the prep_genome_lib.pl command " + \
|
|
337 "on the CTAT Genome Resource Library:\n\t{:s}".format(command)
|
|
338 raise
|
|
339 finally:
|
|
340 # Some code to help us if errors occur.
|
|
341 print "*******************************"
|
|
342 if os.path.exists(genome_build_directory):
|
|
343 print "\nSource Directory {:s}:".format(genome_source_directory)
|
|
344 subprocess.check_call("ls -la {:s}".format(genome_source_directory), shell=True)
|
|
345 dir_entries = os.listdir(genome_source_directory)
|
|
346 for entry in dir_entries:
|
|
347 entry_path = "{:s}/{:s}".format(genome_source_directory, entry)
|
|
348 print "\nDirectory {:s}:".format(entry_path)
|
|
349 subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
|
|
350 else:
|
|
351 print "Genome Source Directory does not exist:\n\t{:s}".format(genome_source_directory)
|
|
352 if os.path.exists(genome_build_directory):
|
|
353 print "\nBuild Directory {:s}:".format(genome_build_directory)
|
|
354 subprocess.check_call("ls -la {:s}".format(genome_build_directory), shell=True)
|
|
355 dir_entries = os.listdir(genome_build_directory)
|
|
356 for entry in dir_entries:
|
|
357 entry_path = "{:s}/{:s}".format(genome_build_directory, entry)
|
|
358 print "\nDirectory {:s}:".format(entry_path)
|
|
359 subprocess.check_call("ls -la {:s}".format(entry_path), shell=True)
|
|
360 if os.path.isdir(entry_path):
|
|
361 subdir_entries = os.listdir(entry_path)
|
|
362 for subdir_entry in subdir_entries:
|
|
363 subdir_entry_path = "{:s}/{:s}".format(entry_path, subdir_entry)
|
|
364 print "\nDirectory {:s}:".format(subdir_entry_path)
|
|
365 subprocess.check_call("ls -la {:s}".format(subdir_entry_path), shell=True)
|
|
366 else:
|
|
367 print "Genome Build Directory does not exist:\n\t{:s}".format(genome_build_directory)
|
|
368 print "*******************************"
|
|
369 else:
|
|
370 raise ValueError("Cannot build the CTAT Genome Resource Library. " + \
|
|
371 "The source directory does not exist:\n\t{:s}".format(genome_source_directory))
|
|
372 elif gmap_build:
|
|
373 gmap_the_library(genome_build_directory)
|
|
374
|
|
375 def main():
|
|
376 #Parse Command Line
|
|
377 parser = argparse.ArgumentParser()
|
|
378 parser.add_argument('-s', '--source_url', default="", \
|
|
379 help='This is the url of a file with the data. They come from https://data.broadinstitute.org/Trinity/CTAT_RESOURCE_LIB/.')
|
|
380 parser.add_argument('-n', '--display_name', default="", \
|
|
381 help='Is used as the display name for the entry of this Genome Resource Library in the data table.')
|
|
382 parser.add_argument('-p', '--destination_path', \
|
|
383 help='Full path of the CTAT Resource Library location or destination, either where it is, or where it will be placed.')
|
|
384 parser.add_argument('-o', '--output_filename', \
|
|
385 help='Name of the output file, where the json dictionary will be written.')
|
|
386 parser.add_argument('-f', '--force_download',
|
|
387 help='Forces download of the Genome Resource Library, even if previously downloaded.', action="store_true")
|
|
388 parser.add_argument('-b', '--build',
|
|
389 help='Forces build/rebuild the Genome Resource Library, even if previously built. ' + \
|
|
390 'Must have downloaded source_data for this to work.', action="store_true")
|
|
391 parser.add_argument('-m', '--gmap_build',
|
|
392 help='Must be selected if you want the library to be gmapped. ' + \
|
|
393 'Will force gmap_build of the Genome Resource Library, even if previously gmapped.', action="store_true")
|
|
394 args = parser.parse_args()
|
|
395
|
|
396 # All of the input parameters are written by default to the output file prior to
|
|
397 # this program being called.
|
|
398 # But I do not get input values from the json file, but rather from command line.
|
|
399 # Just leaving the following code as a comment, in case it might be useful to someone later.
|
|
400 # params = from_json_string(open(filename).read())
|
|
401 # target_directory = params['output_data'][0]['extra_files_path']
|
|
402 # os.mkdir(target_directory)
|
|
403
|
|
404 # FIX - not sure the lib_was_downloaded actually serves a purpose...
|
|
405 lib_was_downloaded = False
|
|
406 download_has_source_data = False
|
|
407 # If we do not download the directory, the destination_path should be the
|
|
408 # location of the genome resource library.
|
|
409 downloaded_directory = None
|
|
410 # FIX - look inside of the args.destination_path to see if the build directory is inside it or is it.
|
|
411 genome_build_directory = None
|
|
412 # FIX - need to make sure we are handling all "possible" combinations of arguments.
|
|
413 # Probably would be good if we could simplify/remove some of them.
|
|
414 if (args.source_url != ""):
|
|
415 downloaded_directory, download_has_source_data, genome_build_directory, lib_was_downloaded = \
|
|
416 download_from_BroadInst(source=args.source_url, \
|
|
417 destination=args.destination_path, \
|
|
418 force_download=args.force_download)
|
|
419 else:
|
|
420 genome_build_directory = args.destination_path
|
|
421 if not os.path.exists(genome_build_directory):
|
|
422 raise ValueError("Cannot find the CTAT Genome Resource Library. " + \
|
|
423 "The directory does not exist:\n\t{:s}".format(genome_build_directory))
|
|
424 # else:
|
|
425 # FIX - Check if there is an actual CTAT Genome Resource Lib there.
|
|
426 # _CTAT_BuildDir_Name
|
|
427
|
|
428 print "\nThe location of the CTAT Genome Resource Library is {:s}.\n".format(genome_build_directory)
|
|
429
|
|
430 # Take out builds for testing.
|
|
431 # FIX - We should leave a file indicating build success the same way we do for download success.
|
|
432 if (download_has_source_data or args.build or args.gmap_build) :
|
|
433 build_the_library(downloaded_directory, genome_build_directory, args.build, args.gmap_build)
|
|
434 elif (args.gmap_build):
|
|
435 gmap_the_library(genome_build_directory)
|
|
436
|
|
437 if (args.source_url != None) and (args.source_url != ""):
|
|
438 # Get the name out of the source's filename.
|
|
439 source_filename_root = args.source_url.split("/")[-1].split(".")[0]
|
|
440
|
|
441 # Determine the display_name for the library.
|
|
442 if (args.display_name is None) or (args.display_name == ""):
|
|
443 if (source_filename_root != None) and (source_filename_root != ""):
|
|
444 # Get the name out of the source filename.
|
|
445 display_name = _CTAT_ResourceLib_DisplayNamePrefix + source_filename_root
|
|
446 else:
|
|
447 display_name = _CTAT_ResourceLib_DisplayNamePrefix + _CTAT_ResourceLib_DefaultGenome
|
|
448 print "WARNING: We do not have a genome name. Using a default name, that might not be correct."
|
|
449 else:
|
|
450 display_name = _CTAT_ResourceLib_DisplayNamePrefix + args.display_name
|
|
451 display_name = display_name.replace(" ","_")
|
|
452 print "The Genome Name will be set to: {:s}\n".format(display_name)
|
|
453
|
|
454 # Create a unique_id for the library.
|
|
455 datetime_stamp = datetime.now().strftime("_%Y_%m_%d_%H_%M_%S_%f")
|
|
456 if (source_filename_root != None) and (source_filename_root != ""):
|
|
457 unique_id = source_filename_root + datetime_stamp
|
|
458 elif (downloaded_directory != None) and (downloaded_directory != ""):
|
|
459 unique_id = os.path.basename(downloaded_directory).split(".")[0]
|
|
460 else:
|
|
461 unique_id = _CTAT_ResourceLib_DefaultGenome + datetime_stamp
|
|
462
|
|
463 print "The Resource Lib's display_name will be set to: {:s}\n".format(display_name)
|
|
464 print "Its unique_id will be set to: {:s}\n".format(unique_id)
|
|
465 print "Its dir_path will be set to: {:s}\n".format(genome_build_directory)
|
|
466
|
|
467 data_manager_dict = {}
|
|
468 data_manager_dict['data_tables'] = {}
|
|
469 data_manager_dict['data_tables']['ctat_genome_resource_libs'] = []
|
|
470 data_table_entry = dict(value=unique_id, name=display_name, path=genome_build_directory)
|
|
471 data_manager_dict['data_tables']['ctat_genome_resource_libs'].append(data_table_entry)
|
|
472
|
|
473 # Temporarily the output file's dictionary is written for debugging:
|
|
474 print "The dictionary for the output file is:\n\t{:s}".format(str(data_manager_dict))
|
|
475 # Save info to json file. This is used to transfer data from the DataManager tool, to the data manager,
|
|
476 # which then puts it into the correct .loc file (I think).
|
|
477 # Comment out the following line when testing without galaxy package.
|
|
478 open(args.output_filename, 'wb').write(to_json_string(data_manager_dict))
|
|
479
|
|
480 if __name__ == "__main__":
|
|
481 main()
|