comparison data_manager/FROGS_data_manager.py @ 0:7caea40b2a30 draft

planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
author dchristiany
date Wed, 02 Oct 2019 11:00:08 -0400
parents
children 95d5191f1dc4
comparison
equal deleted inserted replaced
-1:000000000000 0:7caea40b2a30
1 # -*- coding: utf-8 -*-
2 from galaxy.util.json import from_json_string, to_json_string
3 import os, sys, argparse, time, json, requests, urllib
4
5 def get_args():
6 parser = argparse.ArgumentParser()
7 parser.add_argument("-d","--database")
8 parser.add_argument("-r","--resource")
9 parser.add_argument("-o","--output")
10 args = parser.parse_args()
11 return args
12
13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
16 data_manager_dict['data_tables'][data_table].append(data_table_entry)
17 return data_manager_dict
18
19 def frogs_sources(data_manager_dict,target_directory):
20
21 #get frogs database index
22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
23 with requests.Session() as s:
24 download = s.get(frogs_db_index_link)
25 decoded_content = download.content.decode('utf-8')
26 db_index = download.content.splitlines()
27 db_index = [line.split("\t") for line in db_index[1:]]
28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index] #add column name
29
30 #get frogs dbs
31 dir_name="frogs_db_"+time.strftime("%Y%m%d")
32 os.mkdir(dir_name)
33 dbs=set([])
34 for line in db_index[:2]:
35 value=line[4]
36 name=value.replace("_"," ")
37 link=line[5]
38
39 #download frogs db
40 dl_file = urllib.URLopener()
41 dl_file.retrieve(link, "tmp.tar.gz")
42
43 #unzip frogs db
44 with tarfile.open("tmp.tar.gz") as tar:
45 tar.extractall(dir_name)
46 tar.close()
47 os.remove('tmp.tar.gz')
48
49 #get fasta file path
50 tmp = set(os.listdir(dir_name))
51 new_db = dir_name+"/"+"".join(tmp.difference(dbs))
52 files = os.listdir(new_db)
53 fasta = "".join([file for file in files if file.endswith('.fasta')])
54 path = new_db+'/'+fasta
55 dbs = os.listdir(dir_name)
56 release = value+"_"+time.strftime("%Y-%m-%d")
57 date=time.strftime("%Y%m%d")
58 path = os.path.join(target_directory,path)
59
60 data_table_entry = dict(name = name, value = value, path=path)
61 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
62
63 #def HVL_sources(resource):
64
65 #def phiX_sources(resource):
66
67 def main():
68
69 #get args from command line
70 args = get_args()
71
72 # Extract json file params
73 data_manager_dict = {}
74 filename = args.output
75 params = from_json_string(open(filename).read())
76 target_directory = params[ 'output_data' ][0]['extra_files_path']
77 os.mkdir(target_directory)
78
79 if args.database=="frogs_db_data":
80 frogs_sources(data_manager_dict,target_directory)
81 elif args.database=="HVL_db_data":
82 HVL_sources(args.resource)
83 elif args.database=="phiX_db_data":
84 phiX_sources(args.resource)
85
86 if __name__ == "__main__":
87 main()