annotate data_manager/resource_building.py @ 46:80fc0b28e227 draft

planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
author dchristiany
date Fri, 01 Feb 2019 10:21:58 -0500
parents 3febf3d1139a
children 7b486b0fba4e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
19
85532a48e4e4 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 17
diff changeset
1 # -*- coding: utf-8 -*-
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
2 """
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
3 The purpose of this script is to create source files from different databases to be used in other proteore tools
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
4 """
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
5
21
0a79066992fc planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 20
diff changeset
6 import os, sys, argparse, requests, time, csv, re, json, shutil, zipfile
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
7 from io import BytesIO
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
8 from zipfile import ZipFile
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
9 from galaxy.util.json import from_json_string, to_json_string
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
10
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
11 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
12 # General functions
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
13 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
14 def unzip(url, output_file):
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
15 """
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
16 Get a zip file content from a link and unzip
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
17 """
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
18 content = requests.get(url)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
19 zipfile = ZipFile(BytesIO(content.content))
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
20 output_content = ""
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
21 output_content += zipfile.open(zipfile.namelist()[0]).read()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
22 output = open(output_file, "w")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
23 output.write(output_content)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
24 output.close()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
25
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
26 def _add_data_table_entry(data_manager_dict, data_table_entry,data_table):
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
27 data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
28 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
29 data_manager_dict['data_tables'][data_table].append(data_table_entry)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
30 return data_manager_dict
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
31
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
32 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
33 # 1. Human Protein Atlas
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
34 # - Normal tissue
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
35 # - Pathology
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
36 # - Full Atlas
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
37 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
38 def HPA_sources(data_manager_dict, tissue, target_directory):
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
39 if tissue == "HPA_normal_tissue":
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
40 tissue_name = "HPA normal tissue"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
41 url = "https://www.proteinatlas.org/download/normal_tissue.tsv.zip"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
42 elif tissue == "HPA_pathology":
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
43 tissue_name = "HPA pathology"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
44 url = "https://www.proteinatlas.org/download/pathology.tsv.zip"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
45 elif tissue == "HPA_full_atlas":
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
46 tissue_name = "HPA full atlas"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
47 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
48
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
49 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
50 path = os.path.join(target_directory, output_file)
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
51 unzip(url, path) #download and save file
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
52 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
53 tissue_id = tissue_name.replace(" ","_").replace("/","-")
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
54
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
55 data_table_entry = dict(id=tissue_id, name = tissue_name, value = tissue, path = path)
12
60cb0a5ae661 planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 10
diff changeset
56 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_protein_atlas")
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
57
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
58
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
59 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
60 # 2. Peptide Atlas
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
61 #######################################################################################################
43
3febf3d1139a planemo upload commit 9b701c1faf8be4835b4e7236780ee9ee26f9a373-dirty
dchristiany
parents: 42
diff changeset
62 def peptide_atlas_sources(data_manager_dict, tissue, date, target_directory):
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
63 # Define organism_id (here Human) - to be upraded when other organism added to the project
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
64 organism_id = "2"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
65 # Extract sample_category_id and output filename
34
0c0586ac3e29 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 33
diff changeset
66 tissue=tissue.split(".")
29
871a7347ca24 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 28
diff changeset
67 sample_category_id = tissue[0]
34
0c0586ac3e29 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 33
diff changeset
68 tissue_name = tissue[1]
43
3febf3d1139a planemo upload commit 9b701c1faf8be4835b4e7236780ee9ee26f9a373-dirty
dchristiany
parents: 42
diff changeset
69 output_file = tissue_name+"_"+date + ".tsv"
30
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
70
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
71 query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+ \
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
72 sample_category_id+"&display_options=ShowAbundances&organism_id="+organism_id+ \
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
73 "&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf\
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
74 &QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY"
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
75
30
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
76 with requests.Session() as s:
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
77 download = s.get(query)
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
78 decoded_content = download.content.decode('utf-8')
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
79 cr = csv.reader(decoded_content.splitlines(), delimiter='\t')
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
80
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
81 uni_dict = build_dictionary(cr)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
82
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
83 #columns of data table peptide_atlas
34
0c0586ac3e29 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 33
diff changeset
84 tissue_id = tissue_name+"_"+date
0c0586ac3e29 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 33
diff changeset
85 name = tissue_id.replace("-","/").replace("_"," ")
36
a4811c440b45 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 35
diff changeset
86 path = os.path.join(target_directory,output_file)
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
87
30
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
88 with open(path,"w") as out :
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
89 w = csv.writer(out,delimiter='\t')
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
90 w.writerow(["Uniprot_AC","nb_obs"])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
91 w.writerows(uni_dict.items())
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
92
34
0c0586ac3e29 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 33
diff changeset
93 data_table_entry = dict(id=tissue_id, name=name, value = path, tissue = tissue_name)
12
60cb0a5ae661 planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 10
diff changeset
94 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_peptide_atlas")
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
95
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
96 #function to count the number of observations by uniprot id
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
97 def build_dictionary (csv) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
98 uni_dict = {}
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
99 for line in csv :
30
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
100 if "-" not in line[0] and check_uniprot_access(line[0]) :
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
101 if line[0] in uni_dict :
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
102 uni_dict[line[0]] += int(line[5])
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
103 else :
30
b8271b9a1049 planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
dchristiany
parents: 29
diff changeset
104 uni_dict[line[0]] = int(line[5])
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
105
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
106 return uni_dict
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
107
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
108 #function to check if an id is an uniprot accession number : return True or False-
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
109 def check_uniprot_access (id) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
110 uniprot_pattern = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
111 if uniprot_pattern.match(id) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
112 return True
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
113 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
114 return False
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
115
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
116 def check_entrez_geneid (id) :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
117 entrez_pattern = re.complie("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+")
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
118 if entrez_pattern.match(id) :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
119 return True
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
120 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
121 return False
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
122
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
123 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
124 # 3. ID mapping file
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
125 #######################################################################################################
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
126 import ftplib, gzip
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
127 csv.field_size_limit(sys.maxsize) # to handle big files
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
128
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
129 def id_mapping_sources (data_manager_dict, species, target_directory) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
130
28
d235909789ca planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 27
diff changeset
131 human = species == "Human"
d235909789ca planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 27
diff changeset
132 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" }
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
133 files=["idmapping_selected.tab.gz","idmapping.dat.gz"]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
134
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
135 #header
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
136 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]]
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
137 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]]
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
138
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
139 #print("header ok")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
140
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
141 #get selected.tab and keep only ids of interest
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
142 selected_tab_file=species_dict[species]+"_"+files[0]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
143 tab_path = download_from_uniprot_ftp(selected_tab_file,target_directory)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
144 with gzip.open(tab_path,"rt") as select :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
145 tab_reader = csv.reader(select,delimiter="\t")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
146 for line in tab_reader :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
147 tab.append([line[i] for i in [0,1,2,3,4,5,6,11,13,14,18,19,20]])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
148 os.remove(tab_path)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
149
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
150 #print("selected_tab ok")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
151
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
152 """
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
153 Supplementary ID to get from HUMAN_9606_idmapping.dat :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
154 -NextProt,BioGrid,STRING,KEGG
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
155 """
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
156
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
157 #there's more id type for human
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
158 if human : ids = ['neXtProt','BioGrid','STRING','KEGG' ] #ids to get from dat_file
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
159 else : ids = ['BioGrid','STRING','KEGG' ]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
160 unidict = {}
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
161
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
162 #keep only ids of interest in dictionaries
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
163 dat_file=species_dict[species]+"_"+files[1]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
164 dat_path = download_from_uniprot_ftp(dat_file,target_directory)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
165 with gzip.open(dat_path,"rt") as dat :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
166 dat_reader = csv.reader(dat,delimiter="\t")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
167 for line in dat_reader :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
168 uniprotID=line[0] #UniProtID as key
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
169 id_type=line[1] #ID type of corresponding id, key of sub-dictionnary
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
170 cor_id=line[2] #corresponding id
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
171 if "-" not in id_type : #we don't keep isoform
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
172 if id_type in ids and uniprotID in unidict :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
173 if id_type in unidict[uniprotID] :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
174 unidict[uniprotID][id_type]= ";".join([unidict[uniprotID][id_type],cor_id]) #if there is already a value in the dictionnary
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
175 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
176 unidict[uniprotID].update({ id_type : cor_id })
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
177 elif id_type in ids :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
178 unidict[uniprotID]={id_type : cor_id}
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
179 os.remove(dat_path)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
180
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
181 #print("dat_file ok")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
182
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
183 #add ids from idmapping.dat to the final tab
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
184 for line in tab[1:] :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
185 uniprotID=line[0]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
186 if human :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
187 if uniprotID in unidict :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
188 nextprot = access_dictionary(unidict,uniprotID,'neXtProt')
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
189 if nextprot != '' : nextprot = clean_nextprot_id(nextprot,line[0])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
190 line.extend([nextprot,access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
191 access_dictionary(unidict,uniprotID,'KEGG')])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
192 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
193 line.extend(["","","",""])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
194 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
195 if uniprotID in unidict :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
196 line.extend([access_dictionary(unidict,uniprotID,'BioGrid'),access_dictionary(unidict,uniprotID,'STRING'),
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
197 access_dictionary(unidict,uniprotID,'KEGG')])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
198 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
199 line.extend(["","",""])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
200
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
201 #print ("tab ok")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
202
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
203 #add missing nextprot ID for human
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
204 if human :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
205 #build next_dict
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
206 nextprot_ids = id_list_from_nextprot_ftp("nextprot_ac_list_all.txt",target_directory)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
207 next_dict = {}
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
208 for nextid in nextprot_ids :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
209 next_dict[nextid.replace("NX_","")] = nextid
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
210 os.remove(os.path.join(target_directory,"nextprot_ac_list_all.txt"))
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
211
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
212 #add missing nextprot ID
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
213 for line in tab[1:] :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
214 uniprotID=line[0]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
215 nextprotID=line[13]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
216 if nextprotID == '' and uniprotID in next_dict :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
217 line[13]=next_dict[uniprotID]
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
218
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
219 output_file = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") + ".tsv"
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
220 path = os.path.join(target_directory,output_file)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
221
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
222 with open(path,"w") as out :
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
223 w = csv.writer(out,delimiter='\t')
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
224 w.writerows(tab)
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
225
28
d235909789ca planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 27
diff changeset
226 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"}
d235909789ca planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 27
diff changeset
227 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")"
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
228 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
229
10
2f153b41b6fe planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
dchristiany
parents: 7
diff changeset
230 data_table_entry = dict(id=id, name = name, value = species, path = path)
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
231 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping")
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
232
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
233 def download_from_uniprot_ftp(file,target_directory) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
234 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
235 path = os.path.join(target_directory, file)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
236 ftp = ftplib.FTP("ftp.uniprot.org")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
237 ftp.login("anonymous", "anonymous")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
238 ftp.cwd(ftp_dir)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
239 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
240 ftp.quit()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
241 return (path)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
242
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
243 def id_list_from_nextprot_ftp(file,target_directory) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
244 ftp_dir = "pub/current_release/ac_lists/"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
245 path = os.path.join(target_directory, file)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
246 ftp = ftplib.FTP("ftp.nextprot.org")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
247 ftp.login("anonymous", "anonymous")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
248 ftp.cwd(ftp_dir)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
249 ftp.retrbinary("RETR " + file, open(path, 'wb').write)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
250 ftp.quit()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
251 with open(path,'r') as nextprot_ids :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
252 nextprot_ids = nextprot_ids.read().splitlines()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
253 return (nextprot_ids)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
254
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
255 #return '' if there's no value in a dictionary, avoid error
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
256 def access_dictionary (dico,key1,key2) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
257 if key1 in dico :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
258 if key2 in dico[key1] :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
259 return (dico[key1][key2])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
260 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
261 return ("")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
262 #print (key2,"not in ",dico,"[",key1,"]")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
263 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
264 return ('')
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
265
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
266 #if there are several nextprot ID for one uniprotID, return the uniprot like ID
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
267 def clean_nextprot_id (next_id,uniprotAc) :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
268 if len(next_id.split(";")) > 1 :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
269 tmp = next_id.split(";")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
270 if "NX_"+uniprotAc in tmp :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
271 return ("NX_"+uniprotAc)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
272 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
273 return (tmp[1])
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
274 else :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
275 return (next_id)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
276
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
277
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
278 #######################################################################################################
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
279 # 4. Build protein interaction maps files
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
280 #######################################################################################################
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
281
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
282 def get_interactant_name(line):
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
283
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
284 if line[0] in dico_geneid_to_gene_name :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
285 print line[0]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
286 interactant_A = dico_geneid_to_gene_name[line[0]]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
287 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
288 interactant_A = "NA"
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
289
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
290 if line[1] in dico_geneid_to_gene_name :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
291 interactant_B = dico_geneid_to_gene_name[line[1]]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
292 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
293 interactant_B = "NA"
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
294
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
295 return interactant_A, interactant_B
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
296
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
297 def PPI_ref_files(data_manager_dict, species, interactome, target_directory):
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
298
28
d235909789ca planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 27
diff changeset
299 species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"}
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
300
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
301 ##BioGRID
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
302 if interactome=="biogrid":
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
303
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
304 tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip"
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
305
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
306 #download zip file
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
307 r = requests.get(tab2_link)
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
308 with open("BioGRID.zip", "wb") as code:
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
309 code.write(r.content)
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
310
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
311 #unzip files
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
312 with zipfile.ZipFile("BioGRID.zip", 'r') as zip_ref:
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
313 if not os.path.exists("tmp_BioGRID"): os.makedirs("tmp_BioGRID")
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
314 zip_ref.extractall("tmp_BioGRID")
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
315
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
316 #import file of interest and build dictionary
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
317 file_path="tmp_BioGRID/BIOGRID-ORGANISM-"+species_dict[species].replace(" ","_")+"-3.5.167.tab2.txt"
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
318 with open(file_path,"r") as handle :
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
319 tab_file = csv.reader(handle,delimiter="\t")
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
320 dico_network = {}
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
321 GeneID_index=1
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
322 network_cols=[1,2,7,8,11,12,14,18,20]
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
323 for line in tab_file :
22
778cc97cb115 planemo upload commit e2e4ac8c2c4dc5675d4c415ff192a925ca5e6b98-dirty
dchristiany
parents: 21
diff changeset
324 if line[GeneID_index] not in dico_network:
778cc97cb115 planemo upload commit e2e4ac8c2c4dc5675d4c415ff192a925ca5e6b98-dirty
dchristiany
parents: 21
diff changeset
325 dico_network[line[GeneID_index]]=[[line[i] for i in network_cols]]
778cc97cb115 planemo upload commit e2e4ac8c2c4dc5675d4c415ff192a925ca5e6b98-dirty
dchristiany
parents: 21
diff changeset
326 else:
778cc97cb115 planemo upload commit e2e4ac8c2c4dc5675d4c415ff192a925ca5e6b98-dirty
dchristiany
parents: 21
diff changeset
327 dico_network[line[GeneID_index]].append([line[i] for i in network_cols])
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
328
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
329 #delete tmp_BioGRID directory
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
330 os.remove("BioGRID.zip")
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
331 shutil.rmtree("tmp_BioGRID", ignore_errors=True)
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
332
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
333 #download NCBI2Reactome.txt file and build dictionary
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
334 with requests.Session() as s:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
335 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
40
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
336 r.encoding ="utf-8"
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
337 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
338
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
339 dico_nodes = {}
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
340 geneid_index=0
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
341 pathway_description_index=3
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
342 species_index=5
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
343 for line in tab_file :
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
344 if line[species_index]==species_dict[species]:
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
345 if line[geneid_index] in dico_nodes :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
346 dico_nodes[line[geneid_index]].append(line[pathway_description_index])
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
347 else :
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
348 dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
349
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
350 dico={}
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
351 dico['network']=dico_network
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
352 dico['nodes']=dico_nodes
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
353
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
354 ##Bioplex
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
355 elif interactome=="bioplex":
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
356
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
357 with requests.Session() as s:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
358 r = s.get('http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv')
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
359 r = r.content.decode('utf-8')
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
360 bioplex = csv.reader(r.splitlines(), delimiter='\t')
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
361
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
362 dico_network = {}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
363 dico_network["GeneID"]={}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
364 network_geneid_cols=[0,1,4,5,8]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
365 dico_network["UniProt-AC"]={}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
366 network_uniprot_cols=[2,3,4,5,8]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
367 dico_GeneID_to_UniProt = {}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
368 for line in bioplex :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
369 if line[0] not in dico_network["GeneID"]:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
370 dico_network["GeneID"][line[0]]=[[line[i] for i in network_geneid_cols]]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
371 else :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
372 dico_network["GeneID"][line[0]].append([line[i] for i in network_geneid_cols])
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
373 if line[1] not in dico_network["UniProt-AC"]:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
374 dico_network["UniProt-AC"][line[2]]=[[line[i] for i in network_uniprot_cols]]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
375 else:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
376 dico_network["UniProt-AC"][line[2]].append([line[i] for i in network_uniprot_cols])
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
377 dico_GeneID_to_UniProt[line[0]]=line[2]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
378
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
379 with requests.Session() as s:
40
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
380 r = s.get('https://reactome.org/download/current/UniProt2Reactome.txt')
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
381 r.encoding ="utf-8"
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
382 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
383
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
384 dico_nodes_uniprot = {}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
385 uniProt_index=0
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
386 pathway_description_index=3
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
387 species_index=5
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
388 for line in tab_file :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
389 if line[species_index]==species_dict[species]:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
390 if line[uniProt_index] in dico_nodes_uniprot :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
391 dico_nodes_uniprot[line[uniProt_index]].append(line[pathway_description_index])
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
392 else :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
393 dico_nodes_uniprot[line[uniProt_index]] = [line[pathway_description_index]]
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
394
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
395 with requests.Session() as s:
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
396 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
40
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
397 r.encoding ="utf-8"
fddf4a3847f4 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 39
diff changeset
398 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
399
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
400 dico_nodes_geneid = {}
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
401 geneid_index=0
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
402 pathway_description_index=3
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
403 species_index=5
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
404 for line in tab_file :
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
405 if line[species_index]==species_dict[species]:
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
406 if line[geneid_index] in dico_nodes_geneid :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
407 dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index])
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
408 else :
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
409 dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]]
39
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
410
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
411 dico={}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
412 dico_nodes={}
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
413 dico_nodes['GeneID']=dico_nodes_geneid
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
414 dico_nodes['UniProt-AC']=dico_nodes_uniprot
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
415 dico['network']=dico_network
ec6252ad1a8e planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 36
diff changeset
416 dico['nodes']=dico_nodes
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
417 dico['convert']=dico_GeneID_to_UniProt
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
418
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
419 ##Humap
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
420 elif interactome=="humap":
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
421
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
422 with requests.Session() as s:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
423 r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
424 r = r.content.decode('utf-8')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
425 humap_nodes = csv.reader(r.splitlines(), delimiter=',')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
426
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
427 dico_geneid_to_gene_name={}
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
428 for line in humap_nodes :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
429 if check_entrez_geneid(line[5]):
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
430 if line[5] not in dico_geneid_to_gene_name:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
431 dico_geneid_to_gene_name[line[5]]=[line[4]]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
432 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
433 if line[4] not in dico_geneid_to_gene_name[line[5]] :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
434 dico_geneid_to_gene_name[line[5]].append(line[4])
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
435
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
436 with requests.Session() as s:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
437 r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
438 r = r.content.decode('utf-8')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
439 humap = csv.reader(r.splitlines(), delimiter='\t')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
440
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
441 dico_network = {}
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
442 for line in humap :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
443 if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]):
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
444
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
445 interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name)
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
446
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
447 if line[0] not in dico_network:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
448 dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
449 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
450 dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]])
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
451
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
452 with requests.Session() as s:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
453 r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
454 r.encoding ="utf-8"
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
455 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
456
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
457 dico_nodes = {}
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
458 geneid_index=0
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
459 pathway_description_index=3
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
460 species_index=5
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
461 for line in tab_file :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
462 if line[species_index]==species_dict[species]:
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
463 #Fill dictionary with pathways
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
464 if line[geneid_index] in dico_nodes :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
465 dico_nodes[line[geneid_index]].append(line[pathway_description_index])
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
466 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
467 dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
468
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
469 dico={}
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
470 dico['network']=dico_network
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
471 dico['nodes']=dico_nodes
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
472 dico['gene_name']=dico_geneid_to_gene_name
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
473
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
474 #writing output
42
5a37a086c9a8 planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 41
diff changeset
475 output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json"
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
476 path = os.path.join(target_directory,output_file)
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
477 name = species+" ("+species_dict[species]+") "+time.strftime("%d/%m/%Y")
27
592c59530c32 planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
dchristiany
parents: 26
diff changeset
478 id = species+"_"+interactome+"_"+ time.strftime("%d-%m-%Y")
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
479
41
4062484f2cee planemo upload commit 43e2a01d7519104c2c16510e4dbdc023e65c49c7-dirty
dchristiany
parents: 40
diff changeset
480 with open(path, 'w') as handle:
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
481 json.dump(dico, handle, sort_keys=True)
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
482
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
483 data_table_entry = dict(id=id, name = name, value = species, path = path)
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
484 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_"+interactome+"_dictionaries")
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
485
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
486
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
487 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
488 # Main function
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
489 #######################################################################################################
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
490 def main():
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
491 parser = argparse.ArgumentParser()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
492 parser.add_argument("--hpa", metavar = ("HPA_OPTION"))
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
493 parser.add_argument("--peptideatlas", metavar=("SAMPLE_CATEGORY_ID"))
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
494 parser.add_argument("--id_mapping", metavar = ("ID_MAPPING_SPECIES"))
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
495 parser.add_argument("--interactome", metavar = ("PPI"))
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
496 parser.add_argument("--species")
43
3febf3d1139a planemo upload commit 9b701c1faf8be4835b4e7236780ee9ee26f9a373-dirty
dchristiany
parents: 42
diff changeset
497 parser.add_argument("--date")
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
498 parser.add_argument("-o", "--output")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
499 args = parser.parse_args()
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
500
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
501 data_manager_dict = {}
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
502 # Extract json file params
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
503 filename = args.output
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
504 params = from_json_string(open(filename).read())
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
505 target_directory = params[ 'output_data' ][0]['extra_files_path']
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
506 os.mkdir(target_directory)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
507
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
508 ## Download source files from HPA
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
509 try:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
510 hpa = args.hpa
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
511 except NameError:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
512 hpa = None
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
513 if hpa is not None:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
514 #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
515 hpa = hpa.split(",")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
516 for hpa_tissue in hpa:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
517 HPA_sources(data_manager_dict, hpa_tissue, target_directory)
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
518
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
519 ## Download source file from Peptide Atlas query
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
520 try:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
521 peptide_atlas = args.peptideatlas
43
3febf3d1139a planemo upload commit 9b701c1faf8be4835b4e7236780ee9ee26f9a373-dirty
dchristiany
parents: 42
diff changeset
522 date = args.date
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
523 except NameError:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
524 peptide_atlas = None
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
525 if peptide_atlas is not None:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
526 #target_directory = "/projet/galaxydev/galaxy/tools/proteore/ProteoRE/tools/resources_building/test-data/"
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
527 peptide_atlas = peptide_atlas.split(",")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
528 for pa_tissue in peptide_atlas:
43
3febf3d1139a planemo upload commit 9b701c1faf8be4835b4e7236780ee9ee26f9a373-dirty
dchristiany
parents: 42
diff changeset
529 peptide_atlas_sources(data_manager_dict, pa_tissue, date, target_directory)
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
530
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
531 ## Download ID_mapping source file from Uniprot
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
532 try:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
533 id_mapping=args.id_mapping
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
534 except NameError:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
535 id_mapping = None
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
536 if id_mapping is not None:
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
537 id_mapping = id_mapping .split(",")
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
538 for species in id_mapping :
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
539 id_mapping_sources(data_manager_dict, species, target_directory)
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
540
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
541 ## Download PPI ref files from biogrid/bioplex/humap
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
542 try:
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
543 interactome=args.interactome
46
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
544 if interactome == "biogrid" :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
545 species=args.species
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
546 else :
80fc0b28e227 planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
dchristiany
parents: 43
diff changeset
547 species="Human"
15
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
548 except NameError:
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
549 interactome=None
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
550 species=None
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
551 if interactome is not None and species is not None:
83f57ba70416 planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
dchristiany
parents: 13
diff changeset
552 PPI_ref_files(data_manager_dict, species, interactome, target_directory)
0
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
553
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
554 #save info to json file
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
555 filename = args.output
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
556 open(filename, 'wb').write(to_json_string(data_manager_dict))
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
557
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
558 if __name__ == "__main__":
2de84fea8367 planemo upload commit d703392579d96e480c6461ce679516b12cefb3de-dirty
dchristiany
parents:
diff changeset
559 main()