annotate data_manager/FROGS_data_manager.py @ 11:0cc5f020640e draft default tip

planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
author dchristiany
date Mon, 28 Oct 2019 06:53:13 -0400
parents 238a5328279d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
1 # -*- coding: utf-8 -*-
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
2 from galaxy.util.json import from_json_string, to_json_string
5
ac4fb9d97a51 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 4
diff changeset
3 import os, sys, argparse, time, json, requests, urllib, tarfile
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
4
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
5 def get_args():
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
6 parser = argparse.ArgumentParser()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
7 parser.add_argument("-d","--database")
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
8 parser.add_argument("--all_dbs")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
9 parser.add_argument("--date")
9
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
10 parser.add_argument("--amplicons")
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
11 parser.add_argument("--bases")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
12 parser.add_argument("--filters")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
13 parser.add_argument("--only_last_versions")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
14 parser.add_argument("--tool_data")
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
15 parser.add_argument("-o","--output")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
16 args = parser.parse_args()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
17 return args
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
18
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
19 #build database last version dictionary: key=base_id, value=last version
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
20 def build_last_version_dict(db_index):
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
21 last_version_dict={}
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
22 for line in db_index :
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
23 date=int(line[0])
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
24 base_id=line[5]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
25 if base_id in last_version_dict:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
26 if date > last_version_dict[base_id] : last_version_dict[base_id]=date
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
27 else:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
28 last_version_dict[base_id]=date
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
29 return(last_version_dict)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
30
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
31 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
32 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
33 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
34 data_manager_dict['data_tables'][data_table].append(data_table_entry)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
35 return data_manager_dict
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
36
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
37 def keep_only_last_version(db_index):
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
38 values=["_".join(line[5].split("_")[:-1]) for line in db_index]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
39 to_filter = list(set([val for val in values if values.count(val) >1]))
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
40 out = [line for line in db_index if "_".join(line[5].split("_")[:-1]) not in to_filter]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
41 for bd in to_filter:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
42 versions = [line[4] for line in db_index if "_".join(line[5].split("_")[:-1])==bd]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
43 to_keep = bd+"_"+sorted(versions)[-1]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
44 for line in db_index:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
45 if line[5]==to_keep:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
46 out.append(line)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
47 print(line)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
48 break
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
49 return(out)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
50
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
51 def frogs_sources(data_manager_dict,target_directory):
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
52
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
53 #variables
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
54 amplicons_list=[]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
55 bases_list=[]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
56 filters_list=[]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
57 if args.all_dbs=="false":
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
58 amplicons_list = [amplicon.lower().strip() for amplicon in args.amplicons.split(",") if amplicon != ""]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
59 bases_list = [base.lower().strip() for base in args.bases.split(",") if base != ""]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
60 filters_list = [filter.lower().strip() for filter in args.filters.split(",") if filter!=""]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
61 bottom_date = int(args.date)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
62 tool_data_path=args.tool_data
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
63
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
64 #get frogs database index
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
65 frogs_db_index_link="http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
66 with requests.Session() as s:
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
67 download = s.get(frogs_db_index_link)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
68 decoded_content = download.content.decode('utf-8')
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
69 db_index = download.content.splitlines()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
70 db_index = [line.split("\t") for line in db_index[1:]]
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
71 db_index = [[line[0],line[1].lower(),line[2].lower(),line[3].lower()]+line[4:] for line in db_index]
4
95d5191f1dc4 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 0
diff changeset
72
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
73 #filter databases
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
74 last_version_dict=build_last_version_dict(db_index)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
75 if args.all_dbs=="false":
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
76 if len(amplicons_list)!=0: db_index = [line for line in db_index if any([amplicon in amplicons_list for amplicon in line[1].split(',')])] #filter by amplicons
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
77 if len(bases_list)!=0: db_index = [line for line in db_index if line[2] in bases_list] #filter by base
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
78 if len(filters_list)!=0: db_index = [line for line in db_index if line[3] in filters_list] #filter by filters
11
0cc5f020640e planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 10
diff changeset
79 if bottom_date!=0: db_index = [line for line in db_index if int(line[0])>=bottom_date] #filter by date
0cc5f020640e planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 10
diff changeset
80 if args.only_last_versions=="true":
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
81 db_index = keep_only_last_version(db_index) #keep only last version
9
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
82
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
83 #get frogs dbs
7
99b2dfb1fa1c planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 6
diff changeset
84 os.chdir(target_directory)
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
85 dir_name="frogs_db_"+time.strftime("%Y%m%d")
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
86 os.mkdir(dir_name)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
87 dbs=set([])
7
99b2dfb1fa1c planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 6
diff changeset
88 for line in db_index:
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
89 value=line[5]
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
90 name=value.replace("_"," ")
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
91 link=line[6]
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
92 name_dir="".join([line[6].replace(".tar.gz","").split("/")[-1]])
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
93 file_path=tool_data_path+"/frogs_db/"+name_dir
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
94 if not os.path.exists(file_path): #if the file is not already in frogs_db directory
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
95
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
96 #download frogs db
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
97 dl_file = urllib.URLopener()
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
98 dl_file.retrieve(link, "tmp.tar.gz")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
99
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
100 #unzip frogs db
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
101 with tarfile.open("tmp.tar.gz") as tar:
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
102 tar.extractall(dir_name)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
103 tar.close()
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
104 os.remove('tmp.tar.gz')
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
105
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
106 #get fasta file path
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
107 tmp = set(os.listdir(dir_name))
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
108 new_db = dir_name+"/"+"".join(tmp.difference(dbs))
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
109 files = os.listdir(new_db)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
110 fasta = "".join([file for file in files if file.endswith('.fasta')])
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
111 path = new_db+'/'+fasta
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
112 dbs = os.listdir(dir_name)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
113 release = value+"_"+time.strftime("%Y-%m-%d")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
114 date=time.strftime("%Y%m%d")
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
115 path = os.path.join(target_directory,path)
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
116
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
117 data_table_entry = dict(name = name, value = value, path=path)
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
118 _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
119
9
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
120 def HVL_sources(data_manager_dict,target_directory):
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
121
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
122 #get phiX files
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
123 os.chdir(target_directory)
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
124 for link in ["http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS1.fasta","http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016/Unite_s_7.1_20112016_ITS2.fasta"]:
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
125 file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta")
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
126 dl_file = urllib.URLopener()
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
127 dl_file.retrieve(link,file_name)
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
128
9
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
129 #get fasta file path
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
130 path = os.path.join(target_directory,file_name)
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
131 if link.endswith('ITS1.fasta'):
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
132 name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d")
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
133 elif link.endswith('ITS2.fasta'):
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
134 name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d")
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
135 value=file_name.replace('.fasta','')
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
136
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
137 data_table_entry = dict(name = name, value = value, path=path)
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
138 _add_data_table_entry(data_manager_dict, data_table_entry, "HVL_db")
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
139
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
140 def main():
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
141
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
142 #get args from command line
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
143 global args
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
144 args = get_args()
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
145
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
146 # Extract json file params
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
147 data_manager_dict = {}
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
148 filename = args.output
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
149 params = from_json_string(open(filename).read())
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
150 target_directory = params[ 'output_data' ][0]['extra_files_path']
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
151 os.mkdir(target_directory)
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
152
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
153 if args.database=="frogs_db_data":
10
238a5328279d planemo upload commit b0ebe74a020dcb21b79d8d39e7b6a2f6533b2fc4-dirty
dchristiany
parents: 9
diff changeset
154 frogs_sources(data_manager_dict,target_directory)
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
155 elif args.database=="HVL_db_data":
9
0d9cb5c5aa35 planemo upload commit af25bf8e6c89c9de2e25dc0bc0ead06d8c05cf17
dchristiany
parents: 7
diff changeset
156 HVL_sources(data_manager_dict,target_directory)
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
157
6
d11bc4a8f596 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 5
diff changeset
158 #save info to json file
d11bc4a8f596 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 5
diff changeset
159 open(filename, 'wb').write(to_json_string(data_manager_dict))
d11bc4a8f596 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents: 5
diff changeset
160
0
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
161 if __name__ == "__main__":
7caea40b2a30 planemo upload commit c3ff1475af0e964a0c61458b66e2744c903d8d3d-dirty
dchristiany
parents:
diff changeset
162 main()