comparison data_manager/resource_building.py @ 17:c11d5bdc22e0 draft

planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
author dchristiany
date Tue, 15 Jan 2019 05:16:11 -0500
parents 454c2e2984ea
children 85532a48e4e4
comparison
equal deleted inserted replaced
16:454c2e2984ea 17:c11d5bdc22e0
306 os.remove("BioGRID.zip") 306 os.remove("BioGRID.zip")
307 shutil.rmtree("tmp_BioGRID", ignore_errors=True) 307 shutil.rmtree("tmp_BioGRID", ignore_errors=True)
308 308
309 #download NCBI2Reactome.txt file and build dictionary 309 #download NCBI2Reactome.txt file and build dictionary
310 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') 310 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
311 decoded_content = download.content.decode('us-ascii') 311 decoded_content = download.content.decode('utf-8')
312 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') 312 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t')
313 dico_nodes = {} 313 dico_nodes = {}
314 GeneID_index=0 314 GeneID_index=0
315 pathway_description_index=3 315 pathway_description_index=3
316 species_index=5 316 species_index=5
327 327
328 ##Bioplex 328 ##Bioplex
329 elif interactome=="bioplex": 329 elif interactome=="bioplex":
330 330
331 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") 331 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv")
332 decoded_content = download.content.decode('us-ascii') 332 decoded_content = download.content.decode('utf-8')
333 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t') 333 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t')
334 dico_network = {} 334 dico_network = {}
335 dico_network["GeneID"]={} 335 dico_network["GeneID"]={}
336 network_geneid_cols=[0,1,4,5,8] 336 network_geneid_cols=[0,1,4,5,8]
337 dico_network["UniProt-AC"]={} 337 dico_network["UniProt-AC"]={}
342 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] 342 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols]
343 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] 343 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols]
344 dico_GeneID_to_UniProt[line[0]]=line[2] 344 dico_GeneID_to_UniProt[line[0]]=line[2]
345 345
346 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") 346 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt")
347 decoded_content = download.content.decode('us-ascii') 347 decoded_content = download.content.decode('utf-8')
348 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') 348 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t')
349 dico_nodes = {} 349 dico_nodes = {}
350 uniProt_index=0 350 uniProt_index=0
351 pathway_description_index=3 351 pathway_description_index=3
352 species_index=5 352 species_index=5