Mercurial > repos > dchristiany > data_manager_proteore
comparison data_manager/resource_building.py @ 28:d235909789ca draft
planemo upload commit e694c4b0df30a4286ba09721696e8ec3af25fd97-dirty
author | dchristiany |
---|---|
date | Fri, 25 Jan 2019 09:38:35 -0500 |
parents | 592c59530c32 |
children | 871a7347ca24 |
comparison
equal
deleted
inserted
replaced
27:592c59530c32 | 28:d235909789ca |
---|---|
121 import ftplib, gzip | 121 import ftplib, gzip |
122 csv.field_size_limit(sys.maxsize) # to handle big files | 122 csv.field_size_limit(sys.maxsize) # to handle big files |
123 | 123 |
124 def id_mapping_sources (data_manager_dict, species, target_directory) : | 124 def id_mapping_sources (data_manager_dict, species, target_directory) : |
125 | 125 |
126 human = species == "human" | 126 human = species == "Human" |
127 species_dict = { "human" : "HUMAN_9606", "mouse" : "MOUSE_10090", "rat" : "RAT_10116" } | 127 species_dict = { "Human" : "HUMAN_9606", "Mouse" : "MOUSE_10090", "Rat" : "RAT_10116" } |
128 files=["idmapping_selected.tab.gz","idmapping.dat.gz"] | 128 files=["idmapping_selected.tab.gz","idmapping.dat.gz"] |
129 | 129 |
130 #header | 130 #header |
131 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] | 131 if human : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","neXtProt","BioGrid","STRING","KEGG"]] |
132 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] | 132 else : tab = [["UniProt-AC","UniProt-ID","GeneID","RefSeq","GI","PDB","GO","PIR","MIM","UniGene","Ensembl_Gene","Ensembl_Transcript","Ensembl_Protein","BioGrid","STRING","KEGG"]] |
216 | 216 |
217 with open(path,"w") as out : | 217 with open(path,"w") as out : |
218 w = csv.writer(out,delimiter='\t') | 218 w = csv.writer(out,delimiter='\t') |
219 w.writerows(tab) | 219 w.writerows(tab) |
220 | 220 |
221 name_dict={"human" : "Homo sapiens", "mouse" : "Mus musculus", "rat" : "Rattus norvegicus"} | 221 name_dict={"Human" : "Homo sapiens", "Mouse" : "Mus musculus", "Rat" : "Rattus norvegicus"} |
222 name = name_dict[species]+" "+time.strftime("%d/%m/%Y") | 222 name = species +" (" + name_dict[species]+" "+time.strftime("%d/%m/%Y")+")" |
223 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") | 223 id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") |
224 | 224 |
225 data_table_entry = dict(id=id, name = name, value = species, path = path) | 225 data_table_entry = dict(id=id, name = name, value = species, path = path) |
226 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping") | 226 _add_data_table_entry(data_manager_dict, data_table_entry, "proteore_id_mapping") |
227 | 227 |
274 # 4. Build protein interaction maps files | 274 # 4. Build protein interaction maps files |
275 ####################################################################################################### | 275 ####################################################################################################### |
276 | 276 |
277 def PPI_ref_files(data_manager_dict, species, interactome, target_directory): | 277 def PPI_ref_files(data_manager_dict, species, interactome, target_directory): |
278 | 278 |
279 species_dict={'human':'Homo sapiens',"mouse":"Mus musculus","rat":"Rattus norvegicus"} | 279 species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"} |
280 | 280 |
281 ##BioGRID | 281 ##BioGRID |
282 if interactome=="biogrid": | 282 if interactome=="biogrid": |
283 | 283 |
284 tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip" | 284 tab2_link="https://downloads.thebiogrid.org/Download/BioGRID/Release-Archive/BIOGRID-3.5.167/BIOGRID-ORGANISM-3.5.167.tab2.zip" |