Mercurial > repos > dchristiany > frogs_data_manager
annotate data_manager/FROGS_data_manager.py @ 11:0cc5f020640e draft default tip
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
author | dchristiany |
---|---|
date | Mon, 28 Oct 2019 06:53:13 -0400 |
parents | 238a5328279d |
children |
rev | line source |
---|---|
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
1 # -*- coding: utf-8 -*- |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
2 from galaxy.util.json import from_json_string, to_json_string |
5
ac4fb9d97a51
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
4
diff
changeset
|
3 import os, sys, argparse, time, json, requests, urllib, tarfile |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
4 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
5 def get_args(): |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
6 parser = argparse.ArgumentParser() |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
7 parser.add_argument("-d","--database") |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
8 parser.add_argument("--all_dbs") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
9 parser.add_argument("--date") |
9
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
10 parser.add_argument("--amplicons") |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
11 parser.add_argument("--bases") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
12 parser.add_argument("--filters") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
13 parser.add_argument("--only_last_versions") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
14 parser.add_argument("--tool_data") |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
15 parser.add_argument("-o","--output") |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
16 args = parser.parse_args() |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
17 return args |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
18 |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
19 #build database last version dictionary: key=base_id, value=last version |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
20 def build_last_version_dict(db_index): |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
21 last_version_dict={} |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
22 for line in db_index : |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
23 date=int(line[0]) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
24 base_id=line[5] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
25 if base_id in last_version_dict: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
26 if date > last_version_dict[base_id] : last_version_dict[base_id]=date |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
27 else: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
28 last_version_dict[base_id]=date |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
29 return(last_version_dict) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
30 |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
31 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table): |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
32 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
33 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, []) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
34 data_manager_dict['data_tables'][data_table].append(data_table_entry) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
35 return data_manager_dict |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
36 |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
37 def keep_only_last_version(db_index): |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
38 values=["_".join(line[5].split("_")[:-1]) for line in db_index] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
39 to_filter = list(set([val for val in values if values.count(val) >1])) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
40 out = [line for line in db_index if "_".join(line[5].split("_")[:-1]) not in to_filter] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
41 for bd in to_filter: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
42 versions = [line[4] for line in db_index if "_".join(line[5].split("_")[:-1])==bd] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
43 to_keep = bd+"_"+sorted(versions)[-1] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
44 for line in db_index: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
45 if line[5]==to_keep: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
46 out.append(line) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
47 print(line) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
48 break |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
49 return(out) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
50 |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
51 def frogs_sources(data_manager_dict,target_directory): |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
52 |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
53 #variables |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
54 amplicons_list=[] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
55 bases_list=[] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
56 filters_list=[] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
57 if args.all_dbs=="false": |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
58 amplicons_list = [amplicon.lower().strip() for amplicon in args.amplicons.split(",") if amplicon != ""] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
59 bases_list = [base.lower().strip() for base in args.bases.split(",") if base != ""] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
60 filters_list = [filter.lower().strip() for filter in args.filters.split(",") if filter!=""] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
61 bottom_date = int(args.date) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
62 tool_data_path=args.tool_data |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
63 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
64 #get frogs database index |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
65 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv" |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
66 with requests.Session() as s: |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
67 download = s.get(frogs_db_index_link) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
68 decoded_content = download.content.decode('utf-8') |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
69 db_index = download.content.splitlines() |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
70 db_index = [line.split("\t") for line in db_index[1:]] |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
71 db_index = [[line[0],line[1].lower(),line[2].lower(),line[3].lower()]+line[4:] for line in db_index] |
4
95d5191f1dc4
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
0
diff
changeset
|
72 |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
73 #filter databases |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
74 last_version_dict=build_last_version_dict(db_index) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
75 if args.all_dbs=="false": |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
76 if len(amplicons_list)!=0: db_index = [line for line in db_index if any([amplicon in amplicons_list for amplicon in line[1].split(',')])] #filter by amplicons |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
77 if len(bases_list)!=0: db_index = [line for line in db_index if line[2] in bases_list] #filter by base |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
78 if len(filters_list)!=0: db_index = [line for line in db_index if line[3] in filters_list] #filter by filters |
11
0cc5f020640e
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
10
diff
changeset
|
79 if bottom_date!=0: db_index = [line for line in db_index if int(line[0])>=bottom_date] #filter by date |
0cc5f020640e
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
10
diff
changeset
|
80 if args.only_last_versions=="true": |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
81 db_index = keep_only_last_version(db_index) #keep only last version |
9
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
82 |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
83 #get frogs dbs |
7
99b2dfb1fa1c
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
6
diff
changeset
|
84 os.chdir(target_directory) |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
85 dir_name="frogs_db_"+time.strftime("%Y%m%d") |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
86 os.mkdir(dir_name) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
87 dbs=set([]) |
7
99b2dfb1fa1c
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
6
diff
changeset
|
88 for line in db_index: |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
89 value=line[5] |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
90 name=value.replace("_"," ") |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
91 link=line[6] |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
92 name_dir="".join([line[6].replace(".tar.gz","").split("/")[-1]]) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
93 file_path=tool_data_path+"/frogs_db/"+name_dir |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
94 if not os.path.exists(file_path): #if the file is not already in frogs_db directory |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
95 |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
96 #download frogs db |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
97 dl_file = urllib.URLopener() |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
98 dl_file.retrieve(link, "tmp.tar.gz") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
99 |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
100 #unzip frogs db |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
101 with tarfile.open("tmp.tar.gz") as tar: |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
102 tar.extractall(dir_name) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
103 tar.close() |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
104 os.remove('tmp.tar.gz') |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
105 |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
106 #get fasta file path |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
107 tmp = set(os.listdir(dir_name)) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
108 new_db = dir_name+"/"+"".join(tmp.difference(dbs)) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
109 files = os.listdir(new_db) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
110 fasta = "".join([file for file in files if file.endswith('.fasta')]) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
111 path = new_db+'/'+fasta |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
112 dbs = os.listdir(dir_name) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
113 release = value+"_"+time.strftime("%Y-%m-%d") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
114 date=time.strftime("%Y%m%d") |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
115 path = os.path.join(target_directory,path) |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
116 |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
117 data_table_entry = dict(name = name, value = value, path=path) |
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
118 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db") |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
119 |
9
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
120 def HVL_sources(data_manager_dict,target_directory): |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
121 |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
122 #get phiX files |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
123 os.chdir(target_directory) |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
124 for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]: |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
125 file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta") |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
126 dl_file = urllib.URLopener() |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
127 dl_file.retrieve(link,file_name) |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
128 |
9
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
129 #get fasta file path |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
130 path = os.path.join(target_directory,file_name) |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
131 if link.endswith('ITS1.fasta'): |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
132 name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d") |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
133 elif link.endswith('ITS2.fasta'): |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
134 name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d") |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
135 value=file_name.replace('.fasta','') |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
136 |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
137 data_table_entry = dict(name = name, value = value, path=path) |
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
138 _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db") |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
139 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
140 def main(): |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
141 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
142 #get args from command line |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
143 global args |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
144 args = get_args() |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
145 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
146 # Extract json file params |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
147 data_manager_dict = {} |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
148 filename = args.output |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
149 params = from_json_string(open(filename).read()) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
150 target_directory = params[ 'output_data' ][0]['extra_files_path'] |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
151 os.mkdir(target_directory) |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
152 |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
153 if args.database=="frogs_db_data": |
10
238a5328279d
planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents:
9
diff
changeset
|
154 frogs_sources(data_manager_dict,target_directory) |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
155 elif args.database=="HVL_db_data": |
9
0d9cb5c5aa35
planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents:
7
diff
changeset
|
156 HVL_sources(data_manager_dict,target_directory) |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
157 |
6
d11bc4a8f596
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
5
diff
changeset
|
158 #save info to json file |
d11bc4a8f596
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
5
diff
changeset
|
159 open(filename, 'wb').write(to_json_string(data_manager_dict)) |
d11bc4a8f596
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
5
diff
changeset
|
160 |
0
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
161 if __name__ == "__main__": |
7caea40b2a30
planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff
changeset
|
162 main() |