0
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import sys
|
|
4 import logging
|
|
5 import gzip
|
|
6 import string
|
|
7 import os
|
|
8 import tarfile
|
|
9
|
|
10 from io import BytesIO
|
|
11 from collections import defaultdict
|
|
12
|
|
13 try:
|
|
14 from Bio.SeqRecord import SeqRecord
|
|
15 from Bio import SeqIO
|
|
16 except ImportError:
|
|
17 message = 'This script requires the BioPython python package\n'
|
|
18 sys.stderr.write(message)
|
|
19 sys.exit(1)
|
|
20
|
|
21 try:
|
|
22 from plumbum import cli
|
|
23 except ImportError:
|
|
24 message = 'This script requires the plumbum python package\n'
|
|
25 sys.stderr.write(message)
|
|
26 sys.exit(1)
|
|
27
|
|
28 try:
|
|
29 import ftputil
|
|
30 except ImportError:
|
|
31 message = 'This script requires the ftputil python package\n'
|
|
32 sys.stderr.write(message)
|
|
33 sys.exit(1)
|
|
34
|
|
35
|
|
36 NON_ID = ''.join(c for c in map(chr, range(256)) if not c.isalnum())
|
|
37 NON_ID = NON_ID.replace('_', '').replace('-', '')
|
|
38 NUC_TRANS = string.maketrans('uryswkmbdhvURYSWKMBDHV', 'tnnnnnnnnnnTNNNNNNNNNN')
|
|
39
|
|
40 logging.basicConfig(level=logging.INFO, format='%(message)s')
|
|
41
|
|
42
|
|
43 class DataDownloader(object):
|
|
44 """ Generic class for downloading fasta records via FTP. """
|
|
45
|
|
46 def __init__(self, host_name, base_path, user='anonymous', password='anonymous'):
|
|
47
|
|
48 self.group = 'NoGroup'
|
|
49 self.host_name = host_name
|
|
50 self.base_path = base_path
|
|
51
|
|
52 self.host = ftputil.FTPHost(host_name, user, password)
|
|
53 self.host.chdir(base_path)
|
|
54
|
|
55 @classmethod
|
|
56 def _get_fasta_record(self, record, group, family, organism, identifier, sub_identifier=None, description=''):
|
|
57
|
|
58 record.seq._data = record.seq._data.translate(NUC_TRANS)
|
|
59
|
|
60 group = group.translate(None, NON_ID)
|
|
61 family = family.translate(None, NON_ID)
|
|
62 organism = organism.translate(None, NON_ID)
|
|
63 identifier = identifier.translate(None, NON_ID)
|
|
64
|
|
65 if sub_identifier:
|
|
66 sub_identifier = sub_identifier.translate(None, NON_ID)
|
|
67 record.id = ';'.join([group, family, organism, identifier, sub_identifier])
|
|
68 else:
|
|
69 record.id = ';'.join([group, family, organism, identifier])
|
|
70
|
|
71 record.description = description
|
|
72 record.name = ''
|
|
73
|
|
74 return record
|
|
75
|
|
76 def get_fasta_records(self):
|
|
77
|
|
78 raise NotImplementedError
|
|
79
|
|
80 class SilvaDownlaoder(DataDownloader):
|
|
81 """ Downlaods rRNA transcripts from the Silva database. """
|
|
82
|
|
83 def __init__(self, silva_release='111'):
|
|
84
|
|
85 self.silva_release = silva_release
|
|
86 self.silva_host_name = 'ftp.arb-silva.de'
|
|
87 self.base_path = '/release_%s/Exports/' % self.silva_release
|
|
88
|
|
89 super(SilvaDownlaoder, self).__init__(self.silva_host_name, self.base_path)
|
|
90
|
|
91 self.group = 'rRNA'
|
|
92
|
|
93 def get_fasta_records(self):
|
|
94
|
|
95 remote_path = self.base_path + 'SSURef_%s_NR_tax_silva_trunc.fasta.tgz' % self.silva_release
|
|
96 if self.host.path.isfile(remote_path):
|
|
97
|
|
98 with self.host.file(remote_path, 'rb') as remote_handle:
|
|
99 remote_content = BytesIO(remote_handle.read())
|
|
100
|
|
101 with tarfile.open(fileobj=remote_content) as tar:
|
|
102
|
|
103 for subentry in tar.getnames():
|
|
104 if subentry.endswith('.fasta'):
|
|
105 logging.debug('Processing rRNA file %s' % subentry)
|
|
106 subhandle = tar.extractfile(subentry)
|
|
107
|
|
108 for record in SeqIO.parse(subhandle, "fasta"):
|
|
109 identifier = record.id.split('.')[0]
|
|
110 description = '_'.join(record.description.split(' ')[1:])
|
|
111 taxonomy = description.split(';')
|
|
112 organism = taxonomy[-1]
|
|
113 family = 'Unassigned'
|
|
114
|
|
115 for level in taxonomy:
|
|
116 if level.endswith('eae'):
|
|
117 family = level
|
|
118 break
|
|
119
|
|
120 yield self._get_fasta_record(record, self.group, family, organism, identifier)
|
|
121
|
|
122 class HumanTranscriptDownloader(DataDownloader):
|
|
123 """ Downloads human transcripts fasta records that contain the locations
|
|
124 of their exons on the human genome within their description line.
|
|
125 """
|
|
126
|
|
127 def __init__(self):
|
|
128
|
|
129 self.ensemble_host_name = 'ftp.ensembl.org'
|
|
130 self.gtf_path = '/pub/current_gtf/homo_sapiens'
|
|
131 self.cdna_path = '/pub/current_fasta/homo_sapiens/cdna'
|
|
132 self.ncrna_path = '/pub/current_fasta/homo_sapiens/ncrna'
|
|
133
|
|
134 super(HumanTranscriptDownloader, self).__init__(self.ensemble_host_name, self.gtf_path)
|
|
135
|
|
136 self.chromosomes = set()
|
|
137 self.genes = defaultdict(set)
|
|
138 self.regions = defaultdict(set)
|
|
139 self.group = 'Homo_sapiens_cDNA'
|
|
140
|
|
141 def _cache_annotations(self):
|
|
142 """ Obtains gtf annotations for human transcripts from ensemble and
|
|
143 caches them in memory.
|
|
144 """
|
|
145
|
|
146 self.host.chdir(self.gtf_path)
|
|
147
|
|
148 entries = self.host.listdir(self.host.curdir)
|
|
149
|
|
150 for entry in entries:
|
|
151
|
|
152 if self.host.path.isfile(entry):
|
|
153 if entry.startswith('Homo_sapiens.') and entry.endswith('.gtf.gz'):
|
|
154 logging.debug('Processing cDNA annotations %s' % entry)
|
|
155
|
|
156 with self.host.file(self.gtf_path + '/' + entry, 'rb') as zipped_handle:
|
|
157 remote_content = BytesIO(zipped_handle.read())
|
|
158
|
|
159 with gzip.GzipFile(fileobj=remote_content) as gzipfile:
|
|
160 for i, gtf_line in enumerate(gzipfile):
|
|
161 if i % 100000 == 0:
|
|
162 logging.debug('Cached %i human transcript annotations' % i)
|
|
163
|
|
164 self._add_annotation(gtf_line)
|
|
165
|
|
166 def _get_raw_transcripts(self):
|
|
167 """ Obtains coding and noncoding human transcripts from ensemble and
|
|
168 yields them as fasta records if they have exons that have a known location
|
|
169 on the human genome. Fasta records contain the exon locations in their header
|
|
170 lines.
|
|
171 """
|
|
172
|
|
173 for subpath in [self.cdna_path, self.ncrna_path]:
|
|
174
|
|
175 self.host.chdir(subpath)
|
|
176
|
|
177 entries = self.host.listdir(self.host.curdir)
|
|
178
|
|
179 for entry in entries:
|
|
180 if self.host.path.isfile(entry):
|
|
181 if entry.endswith('cdna.all.fa.gz') or entry.endswith('ncrna.fa.gz'):
|
|
182
|
|
183 logging.debug('Processing human transcript file %s' % entry)
|
|
184
|
|
185 with self.host.file(subpath + '/' + entry, 'rb') as zipped_handle:
|
|
186 remote_content = BytesIO(zipped_handle.read())
|
|
187
|
|
188 with gzip.GzipFile(fileobj=remote_content) as gzipfile:
|
|
189 for i, record in enumerate(SeqIO.parse(gzipfile, "fasta")):
|
|
190
|
|
191 if record.id.startswith('ENST'):
|
|
192
|
|
193 if i % 100000 == 0:
|
|
194 logging.debug('Retrieved %i human transcripts' % i)
|
|
195
|
|
196 record.description = ''
|
|
197
|
|
198 yield record
|
|
199
|
|
200 def _add_annotation(self, gtf_line):
|
|
201 """ Parses gtf annotations and stores them in memory. """
|
|
202
|
|
203 if gtf_line[0] == '#':
|
|
204 return
|
|
205
|
|
206 fields = gtf_line.strip().split('\t')
|
|
207 chromosome, source, feature, start, end, score, strands, frame, attributes\
|
|
208 = fields
|
|
209
|
|
210 start, end = sorted([int(start), int(end)])
|
|
211
|
|
212 if feature != 'exon':
|
|
213 return
|
|
214
|
|
215 attributes = attributes.replace(' ', '').split(';')
|
|
216 transcript_ids = [identifier.split('"')[1] for identifier in attributes\
|
|
217 if identifier.startswith('transcript_id')]
|
|
218
|
|
219 gene_names = [identifier.split('"')[1] for identifier in attributes\
|
|
220 if identifier.startswith('gene_name')]
|
|
221
|
|
222 for transcript_id in transcript_ids:
|
|
223 self.genes[transcript_id] = self.genes[transcript_id].union(gene_names)
|
|
224 self.regions[transcript_id].add((chromosome, start, end))
|
|
225
|
|
226 def get_fasta_records(self):
|
|
227 """ Yields annotated fasta records of human transcripts. """
|
|
228
|
|
229 logging.debug('Caching annotations')
|
|
230 self._cache_annotations()
|
|
231
|
|
232 logging.debug('Downloading and annotating transcripts')
|
|
233 for record in self._get_raw_transcripts():
|
|
234 transcript_id = record.id
|
|
235
|
|
236 if transcript_id in self.regions:
|
|
237
|
|
238 mapping_locations = self.regions[transcript_id]
|
|
239
|
|
240 description = '|'.join([';'.join(map(str, location))\
|
|
241 for location in mapping_locations])
|
|
242
|
|
243 # Obtain gene name for transcript
|
|
244 gene_names = self.genes[transcript_id]
|
|
245 if gene_names:
|
|
246 gene_name = list(gene_names)[0]
|
|
247 else:
|
|
248 gene_name = transcript_id
|
|
249
|
|
250 yield self._get_fasta_record(record, self.group , 'Hominidae', 'Homo_sapiens', gene_name, sub_identifier=transcript_id, description=description)
|
|
251
|
|
252 class RefSeqDownloader(DataDownloader):
|
|
253 """ Generic downloader that known how to interpret and parse genbank records """
|
|
254
|
|
255 def __init__(self, group):
|
|
256
|
|
257 self.refseq_host_name = 'ftp.ncbi.nih.gov'
|
|
258 self.base_path = '/genomes/' + group
|
|
259
|
|
260 super(RefSeqDownloader, self).__init__(self.refseq_host_name, self.base_path)
|
|
261
|
|
262 self.group = group
|
|
263
|
|
264
|
|
265 @classmethod
|
|
266 def _get_project_from_genbank(self, gb_record):
|
|
267 """ Parses genbank project identifier from genbank record. """
|
|
268
|
|
269 project = ''
|
|
270 for dbxref in gb_record.dbxrefs:
|
|
271 for entry in dbxref.split(' '):
|
|
272 if entry.startswith('BioProject'):
|
|
273 project = entry.split(':')[1]
|
|
274 return project
|
|
275
|
|
276 if not project:
|
|
277 for dbxref in gb_record.dbxrefs:
|
|
278 for entry in dbxref.split(' '):
|
|
279 if entry.startswith('Project'):
|
|
280 project = entry.split(':')[1]
|
|
281 return project
|
|
282
|
|
283 return project
|
|
284
|
|
285 @classmethod
|
|
286 def _get_annotations_from_genbank(self, gb_record):
|
|
287 """ Parses taxonomic annotation from genbank record. """
|
|
288
|
|
289 accession = gb_record.annotations['accessions'][0]
|
|
290 organism = gb_record.annotations['organism'].replace(' ', '_')
|
|
291 organism = '_'.join(organism.split('_')[:2])
|
|
292 taxonomy = ("; ").join(gb_record.annotations['taxonomy'])
|
|
293 gi = gb_record.annotations['gi']
|
|
294
|
|
295 family = 'Unassigned'
|
|
296 if len(gb_record.annotations['taxonomy']) > 0:
|
|
297 for level in gb_record.annotations['taxonomy']:
|
|
298 if level.endswith('dae') or level.endswith('aceae'):
|
|
299 family = level
|
|
300 break
|
|
301
|
|
302 if family == 'Unassigned':
|
|
303 logging.debug('Unassigned family found based on %s' % taxonomy)
|
|
304
|
|
305 return organism, accession, gi, taxonomy, family
|
|
306
|
|
307 @classmethod
|
|
308 def _get_record_from_genbank(self, gb_record):
|
|
309 """ Parses sequence from genbank record. """
|
|
310
|
|
311 return SeqRecord(gb_record.seq)
|
|
312
|
|
313 @classmethod
|
|
314 def _parse_genbank_records(self, file_handle):
|
|
315
|
|
316 for gb_record in SeqIO.parse(file_handle, 'genbank'):
|
|
317
|
|
318 project = self._get_project_from_genbank(gb_record)
|
|
319 organism, accession, gi, taxonomy, family =\
|
|
320 self._get_annotations_from_genbank(gb_record)
|
|
321 record = self._get_record_from_genbank(gb_record)
|
|
322
|
|
323 if len(record) > 0:
|
|
324 yield record, project, taxonomy, family, organism, gi
|
|
325
|
|
326 def _get_nc(self, entry_path):
|
|
327
|
|
328 if self.host.path.isfile(entry_path):
|
|
329 logging.debug('Processing refseq entry %s' % entry_path)
|
|
330 with self.host.file(entry_path) as remote_handle:
|
|
331
|
|
332
|
|
333 for record, project, taxonomy, family, organism, gi\
|
|
334 in self._parse_genbank_records(remote_handle):
|
|
335
|
|
336 cleaned = self._get_fasta_record(record, self.group , family, organism, gi)
|
|
337
|
|
338 yield cleaned
|
|
339
|
|
340 def _get_nz(self, entry_path):
|
|
341
|
|
342 scaffold_path = entry_path.replace('.gbk', '.scaffold.gbk.tgz')
|
|
343 if self.host.path.isfile(scaffold_path):
|
|
344 logging.debug('Processing refseq entry %s' % scaffold_path)
|
|
345 with self.host.file(scaffold_path, 'rb') as remote_handle:
|
|
346 remote_content = BytesIO(remote_handle.read())
|
|
347 with tarfile.open(fileobj=remote_content) as tar:
|
|
348 for subentry in tar.getnames():
|
|
349 if subentry.endswith('.gbk'):
|
|
350 logging.debug('Processing subaccession %s' % subentry)
|
|
351 subhandle = tar.extractfile(subentry)
|
|
352 for record, project, taxonomy, family, organism, gi in self._parse_genbank_records(subhandle):
|
|
353 yield self._get_fasta_record(record, self.group , family, organism, gi)
|
|
354
|
|
355
|
|
356 def get_fasta_records(self):
|
|
357
|
|
358 species_dirs = self.host.listdir(self.host.curdir)
|
|
359
|
|
360 for species_dir in species_dirs:
|
|
361
|
|
362 species_path = self.base_path + '/' + species_dir
|
|
363
|
|
364 if self.host.path.isdir(species_path):
|
|
365
|
|
366 self.host.chdir(species_path)
|
|
367
|
|
368 logging.debug('Processing species directory %s' % species_path)
|
|
369
|
|
370 if self.host.path.isdir(species_path):
|
|
371
|
|
372 self.host.chdir(species_path)
|
|
373
|
|
374 entries = self.host.listdir(self.host.curdir)
|
|
375
|
|
376 for entry in entries:
|
|
377
|
|
378 if self.host.path.isfile(self.host.curdir + '/' + entry):
|
|
379
|
|
380 if entry.endswith('.gbk') or entry.endswith('.gbk.gz'):
|
|
381
|
|
382 logging.debug('Processing entry %s' % entry)
|
|
383
|
|
384 entry_path = species_path + '/' + entry
|
|
385
|
|
386 if entry.startswith('NC_'):
|
|
387 for record in self._get_nc(entry_path):
|
|
388 yield record
|
|
389
|
|
390 elif entry.startswith('NZ_'):
|
|
391 for record in self._get_nz(entry_path):
|
|
392 yield record
|
|
393
|
|
394 class HumanGenomeDownloader(DataDownloader):
|
|
395
|
|
396 def __init__(self):
|
|
397
|
|
398 self.ensemble_host_name = 'ftp.ensembl.org'
|
|
399 self.base_path = '/pub/current_fasta/homo_sapiens/dna'
|
|
400
|
|
401 super(HumanGenomeDownloader, self).__init__(self.ensemble_host_name, self.base_path)
|
|
402
|
|
403 self.group = 'Homo_sapiens'
|
|
404
|
|
405 def get_fasta_records(self):
|
|
406
|
|
407 entries = self.host.listdir(self.host.curdir)
|
|
408 for entry in entries:
|
|
409
|
|
410 if self.host.path.isfile(entry):
|
|
411
|
|
412 # You may want to change your filtering criteria here if you would like to include patches and haplotypes
|
|
413 if entry.endswith('fa.gz') and 'dna_sm.primary_assembly' in entry:
|
|
414
|
|
415 logging.debug('Processing human entry %s' % entry)
|
|
416
|
|
417 with self.host.file(self.host.getcwd() + '/' + entry, 'rb') as zipped_handle:
|
|
418 remote_content = BytesIO(zipped_handle.read())
|
|
419
|
|
420 with gzip.GzipFile(fileobj=remote_content) as gzipfile:
|
|
421
|
|
422 for record in SeqIO.parse(gzipfile, "fasta"):
|
|
423
|
|
424 identifier = record.id
|
|
425 organism = 'Homo_sapiens'
|
|
426 family = 'Hominidae'
|
|
427
|
|
428 yield self._get_fasta_record(record, self.group, family, organism, identifier)
|
|
429
|
|
430 class UniVecDownloader(DataDownloader):
|
|
431
|
|
432 def __init__(self):
|
|
433
|
|
434 self.univec_host_name = 'ftp.ncbi.nih.gov'
|
|
435 self.base_path = '/pub/UniVec'
|
|
436
|
|
437 super(UniVecDownloader, self).__init__(self.univec_host_name, self.base_path)
|
|
438
|
|
439 self.group = 'UniVec'
|
|
440
|
|
441 def get_fasta_records(self):
|
|
442
|
|
443 logging.debug('Processing Univec entries')
|
|
444
|
|
445 with self.host.file(self.host.getcwd() + '/' + 'UniVec') as remote_handle:
|
|
446 for record in SeqIO.parse(remote_handle, "fasta"):
|
|
447 organism = '_'.join(record.description.split(' ')[1:])
|
|
448 family = 'Unassigned'
|
|
449 identifier = '000000000'
|
|
450
|
|
451 yield self._get_fasta_record(record, self.group, family, organism, identifier)
|
|
452
|
|
453 class CLI(cli.Application):
|
|
454 """"""
|
|
455 PROGNAME = "metamap"
|
|
456 VERSION = "1.0.0"
|
|
457 DESCRIPTION = """"""
|
|
458
|
|
459 def main(self, *args):
|
|
460
|
|
461 if args:
|
|
462 print("Unknown command %r" % (args[0]))
|
|
463 print self.USAGE
|
|
464 return 1
|
|
465
|
|
466 if not self.nested_command:
|
|
467 print("No command given")
|
|
468 print self.USAGE
|
|
469 return 1
|
|
470
|
|
471 @CLI.subcommand("fasta")
|
|
472 class References(cli.Application):
|
|
473 """ Obtains NCBI RefSeq and Ensembl reference genomes and transcripts"""
|
|
474
|
|
475 valid_references = ['Fungi', 'Fungi_DRAFT', 'Bacteria',
|
|
476 'Bacteria_DRAFT', 'Homo_sapiens',
|
|
477 'Viruses', 'UniVec', 'Plasmids',
|
|
478 'Protozoa', 'rRNA', 'Homo_sapiens_cDNA']
|
|
479
|
|
480 references = cli.SwitchAttr(['-r', '--reference'],
|
|
481 cli.Set(*valid_references, case_sensitive=True),
|
|
482 list=True, default=valid_references,
|
|
483 mandatory=False,
|
|
484 help="Sets the kind of references to obtain")
|
|
485
|
|
486 fasta_path = cli.SwitchAttr(['-o', '--output_file'], str, mandatory=True,
|
|
487 help="Sets the fasta output file")
|
|
488
|
|
489 zipped = cli.Flag(["-z", "--zipped"], help="Write gzipped output")
|
|
490 debug = cli.Flag(["-d", "--debug"], help="Enable debug messages")
|
|
491
|
|
492
|
|
493 def main(self):
|
|
494 """ Downloads genome fasta files from reference databases"""
|
|
495
|
|
496 if self.debug:
|
|
497 logging.getLogger().setLevel(logging.DEBUG)
|
|
498
|
|
499 if os.path.dirname(self.fasta_path) and not os.path.exists(os.path.dirname(self.fasta_path)):
|
|
500 logging.debug('Making directories for output file %s' % self.fasta_path)
|
|
501 os.makedirs(os.path.dirname(self.fasta_path))
|
|
502
|
|
503 if self.zipped:
|
|
504 logging.debug('Making zipped output file %s' % self.fasta_path)
|
|
505 output_file = gzip.open(self.fasta_path, 'wb')
|
|
506
|
|
507 else:
|
|
508 logging.debug('Making regular output file %s' % self.fasta_path)
|
|
509 output_file = open(self.fasta_path, 'w', buffering=100 * 1024 * 1024)
|
|
510
|
|
511
|
|
512 for reference in self.references:
|
|
513
|
|
514 if reference == 'UniVec':
|
|
515 downloader = UniVecDownloader()
|
|
516 elif reference == 'Homo_sapiens':
|
|
517 downloader = HumanGenomeDownloader()
|
|
518 elif reference == 'rRNA':
|
|
519 downloader = SilvaDownlaoder()
|
|
520 elif reference == 'Homo_sapiens_cDNA':
|
|
521 downloader = HumanTranscriptDownloader()
|
|
522 else:
|
|
523 downloader = RefSeqDownloader(group=reference)
|
|
524
|
|
525 for record in downloader.get_fasta_records():
|
|
526 SeqIO.write([record], output_file, "fasta")
|
|
527
|
|
528 output_file.close()
|
|
529
|
|
530 @CLI.subcommand("blast")
|
|
531 class Blast(cli.Application):
|
|
532 """ Obtains blast databases """
|
|
533
|
|
534 valid_references = ['nt', 'nr']
|
|
535
|
|
536 references = cli.SwitchAttr(['-r', '--reference_database'],
|
|
537 cli.Set(*valid_references, case_sensitive=True),
|
|
538 list=True, default=valid_references,
|
|
539 mandatory=False,
|
|
540 help="Sets the kind of database to obtain")
|
|
541
|
|
542 database_path = cli.SwitchAttr(['-o', '--output_path'], str, mandatory=True,
|
|
543 help="Sets the fasta output file")
|
|
544
|
|
545 debug = cli.Flag(["-d", "--debug"], help="Enable debug messages")
|
|
546
|
|
547 def main(self):
|
|
548 """ Downloads blast database files """
|
|
549
|
|
550 if self.debug:
|
|
551 logging.getLogger().setLevel(logging.DEBUG)
|
|
552
|
|
553 if not os.path.exists(self.database_path):
|
|
554 logging.debug('Making output directory %s' % self.database_path)
|
|
555 os.makedirs(self.database_path)
|
|
556
|
|
557 host_name = 'ftp.ncbi.nih.gov'
|
|
558 host = ftputil.FTPHost(host_name, 'anonymous', 'anonymous')
|
|
559
|
|
560 for reference in self.references:
|
|
561
|
|
562 path = '/blast/db'
|
|
563 host.chdir(path)
|
|
564
|
|
565 entries = host.listdir(host.curdir)
|
|
566
|
|
567 for entry in entries:
|
|
568
|
|
569 if host.path.isfile(entry):
|
|
570
|
|
571 if entry.startswith(reference) and entry.endswith('.tar.gz'):
|
|
572 logging.debug('Downloading %s' % entry)
|
|
573 local_path = os.path.join(self.database_path, entry)
|
|
574 host.download(entry, local_path, 'b')
|
|
575 logging.debug('Unpacking %s' % entry)
|
|
576 tfile = tarfile.open(local_path, 'r:gz')
|
|
577 tfile.extractall(self.database_path)
|
|
578 os.remove(local_path)
|
|
579
|
|
580 if __name__ == "__main__":
|
|
581 CLI.run()
|
|
582
|