# HG changeset patch
# User yating-l
# Date 1493847837 14400
# Node ID 2f926e7d623dc1710d585e101d79aa9ad4a2052d
# Parent 464d75111b163d84ab189dc092c1fb8fb3ed06a0
planemo upload
diff -r 464d75111b16 -r 2f926e7d623d data_manager/fetch_reference_data.py
--- a/data_manager/fetch_reference_data.py Wed May 03 16:54:57 2017 -0400
+++ b/data_manager/fetch_reference_data.py Wed May 03 17:43:57 2017 -0400
@@ -26,30 +26,41 @@
genome_name = params['param_dict']['genome_name']
return genome_id, genome_name
-def download_from_GlimmerHMM(data_manager_dict, params, target_directory, sequence_id, sequence_name ):
- GlimmerHMM_DOWNLOAD_URL = 'ftp://ccb.jhu.edu/pub/software/glimmerhmm/GlimmerHMM-3.0.4.tar.gz'
- GlimmerHMM_TRAINED_DIR = os.path.join('GlimmerHMM', 'trained_dir', sequence_id)
- with tarfile.open('GlimmerHMM-3.0.4.tar', mode='r:*') as tar:
+def get_url(params):
+ trained_url = params['param_dict']['trained_url']
+ return trained_url
+
+def download_from_GlimmerHMM(data_manager_dict, target_directory, sequence_id, sequence_name, trained_dir):
+ if not trained_dir:
+ trained_dir = 'ftp://ccb.jhu.edu/pub/software/glimmerhmm/GlimmerHMM-3.0.4.tar.gz'
+ #Download trained data, ref: https://dzone.com/articles/how-download-file-python
+ f = urllib2.urlopen(trained_dir)
+ data = f.read()
+ downloadpath = 'tmp'
+ os.mkdir(downloadpath)
+ filepath = os.path.join(downloadpath, 'GlimmerHMM-3.0.4.tar')
+ with open(filepath, 'wb') as code:
+ code.write(data)
+ with tarfile.open(filepath, mode='r:*') as tar:
subdir = [
tarinfo for tarinfo in tar.getmembers()
if sequence_id in tarinfo.name
]
tar.extractall(members=subdir)
+ GlimmerHMM_TRAINED_DIR = os.path.join(downloadpath, 'GlimmerHMM', 'trained_dir', sequence_id)
glimmerhmm_trained_target_dir = os.path.join(target_directory, sequence_id)
shutil.copytree(GlimmerHMM_TRAINED_DIR, glimmerhmm_trained_target_dir)
data_table_entry = dict(value=sequence_id, name=sequence_name, path=glimmerhmm_trained_target_dir)
_add_data_table_entry(data_manager_dict, data_table_entry)
+
+ cleanup_before_exit('tmp')
- cleanup_before_exit(GlimmerHMM_TRAINED_DIR)
-
-def _add_data_table_entry( data_manager_dict, data_table_entry ):
+def _add_data_table_entry(data_manager_dict, data_table_entry):
data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
data_manager_dict['data_tables']['reference_data'] = data_manager_dict['data_tables'].get('reference_data', [])
data_manager_dict['data_tables']['reference_data'].append( data_table_entry )
return data_manager_dict
-REFERENCE_SOURCE_TO_DOWNLOAD = dict(glimmerhmm=download_from_GlimmerHMM)
-
def main():
#Parse Command Line
parser = argparse.ArgumentParser()
@@ -64,10 +75,9 @@
data_manager_dict = {}
sequence_id, sequence_name = get_reference_id_name(params)
-
+ trained_dir = get_url(params)
#Fetch the FASTA
- REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['trained_dir']](data_manager_dict, params, target_directory, sequence_id, sequence_name)
-
+ download_from_GlimmerHMM(data_manager_dict, target_directory, sequence_id, sequence_name, trained_dir)
#save info to json file
open(filename, 'wb').write(to_json_string(data_manager_dict))
diff -r 464d75111b16 -r 2f926e7d623d data_manager/fetch_reference_data.xml
--- a/data_manager/fetch_reference_data.xml Wed May 03 16:54:57 2017 -0400
+++ b/data_manager/fetch_reference_data.xml Wed May 03 17:43:57 2017 -0400
@@ -4,7 +4,7 @@
-
+