annotate data_manager/FROGS_data_manager.py @ 0:7caea40b2a30 draft

planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
author dchristiany
date Wed, 02 Oct 2019 11:00:08 -0400
parents
children 95d5191f1dc4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
1 # -*- coding: utf-8 -*-
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
2 from galaxy.util.json import from_json_string, to_json_string
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
3 import os, sys, argparse, time, json, requests, urllib
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
4
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
5 def get_args():
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
6 parser = argparse.ArgumentParser()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
7 parser.add_argument("-d","--database")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
8 parser.add_argument("-r","--resource")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
9 parser.add_argument("-o","--output")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
10 args = parser.parse_args()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
11 return args
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
12
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
13 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
14 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
15 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
16 data_manager_dict['data_tables'][data_table].append(data_table_entry)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
17 return data_manager_dict
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
18
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
19 def frogs_sources(data_manager_dict,target_directory):
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
20
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
21 #get frogs database index
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
22 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
23 with requests.Session() as s:
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
24 download = s.get(frogs_db_index_link)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
25 decoded_content = download.content.decode('utf-8')
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
26 db_index = download.content.splitlines()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
27 db_index = [line.split("\t") for line in db_index[1:]]
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
28 db_index = [line[:4]+[line[1]+"_"+line[2]+"_"+line[1]]+[line[4]] for line in db_index] #add column name
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
29
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
30 #get frogs dbs
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
31 dir_name="frogs_db_"+time.strftime("%Y%m%d")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
32 os.mkdir(dir_name)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
33 dbs=set([])
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
34 for line in db_index[:2]:
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
35 value=line[4]
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
36 name=value.replace("_"," ")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
37 link=line[5]
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
38
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
39 #download frogs db
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
40 dl_file = urllib.URLopener()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
41 dl_file.retrieve(link, "tmp.tar.gz")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
42
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
43 #unzip frogs db
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
44 with tarfile.open("tmp.tar.gz") as tar:
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
45 tar.extractall(dir_name)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
46 tar.close()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
47 os.remove('tmp.tar.gz')
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
48
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
49 #get fasta file path
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
50 tmp = set(os.listdir(dir_name))
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
51 new_db = dir_name+"/"+"".join(tmp.difference(dbs))
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
52 files = os.listdir(new_db)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
53 fasta = "".join([file for file in files if file.endswith('.fasta')])
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
54 path = new_db+'/'+fasta
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
55 dbs = os.listdir(dir_name)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
56 release = value+"_"+time.strftime("%Y-%m-%d")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
57 date=time.strftime("%Y%m%d")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
58 path = os.path.join(target_directory,path)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
59
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
60 data_table_entry = dict(name = name, value = value, path=path)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
61 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
62
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
63 #def HVL_sources(resource):
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
64
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
65 #def phiX_sources(resource):
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
66
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
67 def main():
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
68
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
69 #get args from command line
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
70 args = get_args()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
71
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
72 # Extract json file params
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
73 data_manager_dict = {}
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
74 filename = args.output
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
75 params = from_json_string(open(filename).read())
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
76 target_directory = params[ 'output_data' ][0]['extra_files_path']
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
77 os.mkdir(target_directory)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
78
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
79 if args.database=="frogs_db_data":
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
80 frogs_sources(data_manager_dict,target_directory)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
81 elif args.database=="HVL_db_data":
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
82 HVL_sources(args.resource)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
83 elif args.database=="phiX_db_data":
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
84 phiX_sources(args.resource)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
85
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
86 if __name__ == "__main__":
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
87 main()