Mercurial > repos > dchristiany > data_manager_proteore
comparison data_manager/resource_building.py @ 17:c11d5bdc22e0 draft
planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
author | dchristiany |
---|---|
date | Tue, 15 Jan 2019 05:16:11 -0500 |
parents | 454c2e2984ea |
children | 85532a48e4e4 |
comparison
equal
deleted
inserted
replaced
16:454c2e2984ea | 17:c11d5bdc22e0 |
---|---|
306 os.remove("BioGRID.zip") | 306 os.remove("BioGRID.zip") |
307 shutil.rmtree("tmp_BioGRID", ignore_errors=True) | 307 shutil.rmtree("tmp_BioGRID", ignore_errors=True) |
308 | 308 |
309 #download NCBI2Reactome.txt file and build dictionary | 309 #download NCBI2Reactome.txt file and build dictionary |
310 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') | 310 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') |
311 decoded_content = download.content.decode('us-ascii') | 311 decoded_content = download.content.decode('utf-8') |
312 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') | 312 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') |
313 dico_nodes = {} | 313 dico_nodes = {} |
314 GeneID_index=0 | 314 GeneID_index=0 |
315 pathway_description_index=3 | 315 pathway_description_index=3 |
316 species_index=5 | 316 species_index=5 |
327 | 327 |
328 ##Bioplex | 328 ##Bioplex |
329 elif interactome=="bioplex": | 329 elif interactome=="bioplex": |
330 | 330 |
331 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") | 331 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") |
332 decoded_content = download.content.decode('us-ascii') | 332 decoded_content = download.content.decode('utf-8') |
333 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t') | 333 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t') |
334 dico_network = {} | 334 dico_network = {} |
335 dico_network["GeneID"]={} | 335 dico_network["GeneID"]={} |
336 network_geneid_cols=[0,1,4,5,8] | 336 network_geneid_cols=[0,1,4,5,8] |
337 dico_network["UniProt-AC"]={} | 337 dico_network["UniProt-AC"]={} |
342 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] | 342 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] |
343 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] | 343 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] |
344 dico_GeneID_to_UniProt[line[0]]=line[2] | 344 dico_GeneID_to_UniProt[line[0]]=line[2] |
345 | 345 |
346 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") | 346 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") |
347 decoded_content = download.content.decode('us-ascii') | 347 decoded_content = download.content.decode('utf-8') |
348 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') | 348 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') |
349 dico_nodes = {} | 349 dico_nodes = {} |
350 uniProt_index=0 | 350 uniProt_index=0 |
351 pathway_description_index=3 | 351 pathway_description_index=3 |
352 species_index=5 | 352 species_index=5 |