Mercurial > repos > dchristiany > frogs_data_manager
comparison data_manager/FROGS_data_manager.py @ 0:7caea40b2a30 draft
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
author | dchristiany |
---|---|
date | Wed, 02 Oct 2019 11:00:08 -0400 |
parents | |
children | 95d5191f1dc4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7caea40b2a30 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 from galaxy.util.json import from_json_string, to_json_string | |
3 import os, sys, argparse, time, json, requests, urllib | |
4 | |
5 def get_args(): | |
6 parser = argparse.ArgumentParser() | |
7 parser.add_argument("-d","--database") | |
8 parser.add_argument("-r","--resource") | |
9 parser.add_argument("-o","--output") | |
10 args = parser.parse_args() | |
11 return args | |
12 | |
13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): | |
14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) | |
15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) | |
16 data_manager_dict['data_tables'][data_table].append(data_table_entry) | |
17 return data_manager_dict | |
18 | |
19 def frogs_sources(data_manager_dict,target_directory): | |
20 | |
21 #get frogs database index | |
22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" | |
23 with requests.Session() as s: | |
24 download = s.get(frogs_db_index_link) | |
25 decoded_content = download.content.decode('utf-8') | |
26 db_index = download.content.splitlines() | |
27 db_index = [line.split("\t") for line in db_index[1:]] | |
28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index] #add column name | |
29 | |
30 #get frogs dbs | |
31 dir_name="frogs_db_"+time.strftime("%Y%m%d") | |
32 os.mkdir(dir_name) | |
33 dbs=set([]) | |
34 for line in db_index[:2]: | |
35 value=line[4] | |
36 name=value.replace("_"," ") | |
37 link=line[5] | |
38 | |
39 #download frogs db | |
40 dl_file = urllib.URLopener() | |
41 dl_file.retrieve(link, "tmp.tar.gz") | |
42 | |
43 #unzip frogs db | |
44 with tarfile.open("tmp.tar.gz") as tar: | |
45 tar.extractall(dir_name) | |
46 tar.close() | |
47 os.remove('tmp.tar.gz') | |
48 | |
49 #get fasta file path | |
50 tmp = set(os.listdir(dir_name)) | |
51 new_db = dir_name+"/"+"".join(tmp.difference(dbs)) | |
52 files = os.listdir(new_db) | |
53 fasta = "".join([file for file in files if file.endswith('.fasta')]) | |
54 path = new_db+'/'+fasta | |
55 dbs = os.listdir(dir_name) | |
56 release = value+"_"+time.strftime("%Y-%m-%d") | |
57 date=time.strftime("%Y%m%d") | |
58 path = os.path.join(target_directory,path) | |
59 | |
60 data_table_entry = dict(name = name, value = value, path=path) | |
61 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") | |
62 | |
63 #def HVL_sources(resource): | |
64 | |
65 #def phiX_sources(resource): | |
66 | |
67 def main(): | |
68 | |
69 #get args from command line | |
70 args = get_args() | |
71 | |
72 # Extract json file params | |
73 data_manager_dict = {} | |
74 filename = args.output | |
75 params = from_json_string(open(filename).read()) | |
76 target_directory = params[ 'output_data' ][0]['extra_files_path'] | |
77 os.mkdir(target_directory) | |
78 | |
79 if args.database=="frogs_db_data": | |
80 frogs_sources(data_manager_dict,target_directory) | |
81 elif args.database=="HVL_db_data": | |
82 HVL_sources(args.resource) | |
83 elif args.database=="phiX_db_data": | |
84 phiX_sources(args.resource) | |
85 | |
86 if __name__ == "__main__": | |
87 main() |