Mercurial > repos > dchristiany > data_manager_proteore
comparison data_manager/resource_building.py @ 20:d2af5a5053fd draft
planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
author | dchristiany |
---|---|
date | Tue, 15 Jan 2019 10:00:02 -0500 |
parents | 85532a48e4e4 |
children | 0a79066992fc |
comparison
equal
deleted
inserted
replaced
19:85532a48e4e4 | 20:d2af5a5053fd |
---|---|
1 # -*- coding: utf-8 -*- | 1 # -*- coding: utf-8 -*- |
2 """ | 2 """ |
3 The purpose of this script is to create source files from different databases to be used in other proteore tools | 3 The purpose of this script is to create source files from different databases to be used in other proteore tools |
4 """ | 4 """ |
5 | 5 |
6 import os, sys, argparse, requests, time, csv, re, json, zipfile, shutil | 6 import os, sys, argparse, requests, time, csv, re, json, shutil |
7 from io import BytesIO | 7 from io import BytesIO |
8 from zipfile import ZipFile | 8 from zipfile import ZipFile |
9 from galaxy.util.json import from_json_string, to_json_string | 9 from galaxy.util.json import from_json_string, to_json_string |
10 | 10 |
11 ####################################################################################################### | 11 ####################################################################################################### |
306 #delete tmp_BioGRID directory | 306 #delete tmp_BioGRID directory |
307 os.remove("BioGRID.zip") | 307 os.remove("BioGRID.zip") |
308 shutil.rmtree("tmp_BioGRID", ignore_errors=True) | 308 shutil.rmtree("tmp_BioGRID", ignore_errors=True) |
309 | 309 |
310 #download NCBI2Reactome.txt file and build dictionary | 310 #download NCBI2Reactome.txt file and build dictionary |
311 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') | 311 r = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') |
312 decoded_content = download.content.decode('utf-8') | 312 r.encoding ="utf-8" |
313 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') | 313 tab_file = csv.reader(r.content.splitlines(), delimiter='\t') |
314 dico_nodes = {} | 314 dico_nodes = {} |
315 GeneID_index=0 | 315 GeneID_index=0 |
316 pathway_description_index=3 | 316 pathway_description_index=3 |
317 species_index=5 | 317 species_index=5 |
318 for line in tab_file : | 318 for line in tab_file : |
327 dico['nodes']=dico_nodes | 327 dico['nodes']=dico_nodes |
328 | 328 |
329 ##Bioplex | 329 ##Bioplex |
330 elif interactome=="bioplex": | 330 elif interactome=="bioplex": |
331 | 331 |
332 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") | 332 r = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") |
333 decoded_content = download.content.decode('utf-8') | 333 r.encoding ="utf-8" |
334 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t') | 334 bioplex = csv.reader(r.content.splitlines(), delimiter='\t') |
335 dico_network = {} | 335 dico_network = {} |
336 dico_network["GeneID"]={} | 336 dico_network["GeneID"]={} |
337 network_geneid_cols=[0,1,4,5,8] | 337 network_geneid_cols=[0,1,4,5,8] |
338 dico_network["UniProt-AC"]={} | 338 dico_network["UniProt-AC"]={} |
339 network_uniprot_cols=[2,3,4,5,8] | 339 network_uniprot_cols=[2,3,4,5,8] |
342 for line in bioplex : | 342 for line in bioplex : |
343 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] | 343 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] |
344 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] | 344 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] |
345 dico_GeneID_to_UniProt[line[0]]=line[2] | 345 dico_GeneID_to_UniProt[line[0]]=line[2] |
346 | 346 |
347 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") | 347 r = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") |
348 decoded_content = download.content.decode('utf-8') | 348 r.encoding ="utf-8" |
349 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') | 349 tab_file = csv.reader(r.content.splitlines(), delimiter='\t') |
350 dico_nodes = {} | 350 dico_nodes = {} |
351 uniProt_index=0 | 351 uniProt_index=0 |
352 pathway_description_index=3 | 352 pathway_description_index=3 |
353 species_index=5 | 353 species_index=5 |
354 for line in tab_file : | 354 for line in tab_file : |