0
|
1 #!/usr/bin/env python
|
|
2 #
|
|
3 # Data manager for downloading Plant Tribes scaffolds data.
|
|
4 import argparse
|
|
5 import json
|
|
6 import os
|
|
7 import shutil
|
|
8 import sys
|
|
9 import tarfile
|
|
10 import urllib2
|
|
11 import zipfile
|
|
12
|
|
13
|
|
14 DEFAULT_DATA_TABLE_NAMES = ["plant_tribes_scaffolds"]
|
|
15
|
|
16
|
|
17 def add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
|
|
18 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
|
|
19 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
|
|
20 data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
|
|
21 return data_manager_dict
|
|
22
|
|
23
|
|
24 def make_directory(dir):
|
|
25 if not os.path.exists(dir):
|
|
26 os.makedirs(dir)
|
|
27
|
|
28
|
|
29 def remove_directory(dir):
|
|
30 if os.path.exists(dir):
|
|
31 shutil.rmtree(dir)
|
|
32
|
|
33
|
1
|
34 def extract_archive(file_path, work_directory):
|
|
35 if tarfile.is_tarfile(file_path):
|
|
36 fh = tarfile.open(file_path, 'r:*')
|
|
37 elif zipfile.is_zipfile(file_path):
|
|
38 fh = zipfile.ZipFile(file_path, 'r')
|
|
39 else:
|
|
40 return
|
|
41 fh.extractall(work_directory)
|
|
42
|
|
43
|
|
44 def move_files(work_directory, target_directory):
|
|
45 # Move the files into defined output directory.
|
|
46 for filename in os.listdir(work_directory):
|
|
47 shutil.move(os.path.join(work_directory, filename), target_directory)
|
|
48
|
|
49
|
|
50 def url_download(url, work_directory):
|
0
|
51 file_path = os.path.join(work_directory, os.path.basename(url))
|
|
52 src = None
|
|
53 dst = None
|
|
54 try:
|
|
55 req = urllib2.Request(url)
|
|
56 src = urllib2.urlopen(req)
|
|
57 dst = open(file_path, 'wb')
|
|
58 while True:
|
|
59 chunk = src.read(2**10)
|
|
60 if chunk:
|
|
61 dst.write(chunk)
|
|
62 else:
|
|
63 break
|
|
64 except Exception, e:
|
|
65 print >>sys.stderr, str(e)
|
|
66 finally:
|
|
67 if src:
|
|
68 src.close()
|
|
69 if dst:
|
|
70 dst.close()
|
1
|
71 return file_path
|
|
72
|
|
73
|
|
74 def download(target_file_path, web_url, config_web_url, description, data_table_names=DEFAULT_DATA_TABLE_NAMES):
|
|
75 data_manager_dict = {}
|
|
76 data_table_entry = {}
|
|
77 # Download the scaffolds data.
|
|
78 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'scaffolds'))
|
|
79 make_directory(work_directory)
|
2
|
80 file_path = url_download(web_url, work_directory)
|
1
|
81 extract_archive(file_path, work_directory)
|
0
|
82 os.remove(file_path)
|
|
83 # Move the scaffolds data files into defined output directory.
|
1
|
84 target_directory = make_directory(target_file_path)
|
|
85 move_files(work_directory, target_directory)
|
0
|
86 remove_directory(work_directory)
|
1
|
87 # Populate the data_manager_dict with the scaffolds data entry.
|
0
|
88 for file_path in os.listdir(target_directory):
|
|
89 full_path = os.path.abspath(os.path.join(target_directory, file_path))
|
|
90 entry_name = "%s" % os.path.basename(file_path)
|
1
|
91 data_table_entry['value'] = entry_name
|
|
92 data_table_entry['name'] = entry_name
|
|
93 data_table_entry['path'] = full_path
|
|
94 data_table_entry['description'] = description
|
|
95 # Download the default configuration files.
|
|
96 work_directory = os.path.abspath(os.path.join(os.getcwd(), 'configs'))
|
|
97 make_directory(work_directory)
|
2
|
98 file_path = url_download(config_web_url, work_directory)
|
1
|
99 extract_archive(file_path, work_directory)
|
|
100 os.remove(file_path)
|
|
101 shutil.rmtree(target_directory)
|
|
102 # Move the scaffolds data files into defined output directory.
|
|
103 target_directory = make_directory(target_file_path)
|
|
104 move_files(work_directory, target_directory)
|
|
105 remove_directory(work_directory)
|
|
106 # Populate the data_manager_dict with the default configs entry.
|
|
107 for file_path in os.listdir(target_directory):
|
|
108 full_path = os.path.abspath(os.path.join(target_directory, file_path))
|
|
109 data_table_entry['config_path'] = full_path
|
|
110 # Populate the data_man ager_dict.
|
|
111 for data_table_name in data_table_names:
|
|
112 data_manager_dict = add_data_table_entry(data_manager_dict, data_table_name, data_table_entry)
|
0
|
113 return data_manager_dict
|
|
114
|
|
115
|
|
116 parser = argparse.ArgumentParser()
|
|
117 parser.add_argument('--description', dest='description', default=None, help='Description')
|
|
118 parser.add_argument('--name', dest='name', help='Data table entry unique ID')
|
|
119 parser.add_argument('--out_file', dest='out_file', help='JSON output file')
|
1
|
120 parser.add_argument('--web_url', dest='web_url', help='URL for downloading scaffolds')
|
|
121 parser.add_argument('--config_web_url', dest='config_web_url', help='URL for downloading default configs')
|
0
|
122
|
|
123 args = parser.parse_args()
|
|
124
|
|
125 # Some magic happens with tools of type "manage_data" in that the output
|
|
126 # file contains some JSON data that allows us to define the target directory.
|
|
127 params = json.loads(open(args.out_file).read())
|
1
|
128 target_file_path = params['output_data'][0]['extra_files_path']
|
|
129
|
0
|
130
|
|
131 if args.description is None:
|
|
132 description = ''
|
|
133 else:
|
|
134 description = args.description.strip()
|
|
135
|
|
136 # Get the scaffolds data.
|
1
|
137 data_manager_dict = download(target_file_path, args.web_url, args.config_web_url, description)
|
0
|
138 # Write the JSON output dataset.
|
|
139 fh = open(args.out_file, 'wb')
|
|
140 fh.write(json.dumps(data_manager_dict))
|
|
141 fh.close()
|