comparison data_manager/resource_building.py @ 20:d2af5a5053fd draft

planemo upload commit 8040003119a3d54866ec6ee9b9f659f2af554817-dirty
author dchristiany
date Tue, 15 Jan 2019 10:00:02 -0500
parents 85532a48e4e4
children 0a79066992fc
comparison
equal deleted inserted replaced
19:85532a48e4e4 20:d2af5a5053fd
1 # -*- coding: utf-8 -*- 1 # -*- coding: utf-8 -*-
2 """ 2 """
3 The purpose of this script is to create source files from different databases to be used in other proteore tools 3 The purpose of this script is to create source files from different databases to be used in other proteore tools
4 """ 4 """
5 5
6 import os, sys, argparse, requests, time, csv, re, json, zipfile, shutil 6 import os, sys, argparse, requests, time, csv, re, json, shutil
7 from io import BytesIO 7 from io import BytesIO
8 from zipfile import ZipFile 8 from zipfile import ZipFile
9 from galaxy.util.json import from_json_string, to_json_string 9 from galaxy.util.json import from_json_string, to_json_string
10 10
11 ####################################################################################################### 11 #######################################################################################################
306 #delete tmp_BioGRID directory 306 #delete tmp_BioGRID directory
307 os.remove("BioGRID.zip") 307 os.remove("BioGRID.zip")
308 shutil.rmtree("tmp_BioGRID", ignore_errors=True) 308 shutil.rmtree("tmp_BioGRID", ignore_errors=True)
309 309
310 #download NCBI2Reactome.txt file and build dictionary 310 #download NCBI2Reactome.txt file and build dictionary
311 download = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') 311 r = requests.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
312 decoded_content = download.content.decode('utf-8') 312 r.encoding ="utf-8"
313 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') 313 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
314 dico_nodes = {} 314 dico_nodes = {}
315 GeneID_index=0 315 GeneID_index=0
316 pathway_description_index=3 316 pathway_description_index=3
317 species_index=5 317 species_index=5
318 for line in tab_file : 318 for line in tab_file :
327 dico['nodes']=dico_nodes 327 dico['nodes']=dico_nodes
328 328
329 ##Bioplex 329 ##Bioplex
330 elif interactome=="bioplex": 330 elif interactome=="bioplex":
331 331
332 download = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv") 332 r = requests.get("http://bioplex.hms.harvard.edu/data/BioPlex_interactionList_v4a.tsv")
333 decoded_content = download.content.decode('utf-8') 333 r.encoding ="utf-8"
334 bioplex = csv.reader(decoded_content.splitlines(), delimiter='\t') 334 bioplex = csv.reader(r.content.splitlines(), delimiter='\t')
335 dico_network = {} 335 dico_network = {}
336 dico_network["GeneID"]={} 336 dico_network["GeneID"]={}
337 network_geneid_cols=[0,1,4,5,8] 337 network_geneid_cols=[0,1,4,5,8]
338 dico_network["UniProt-AC"]={} 338 dico_network["UniProt-AC"]={}
339 network_uniprot_cols=[2,3,4,5,8] 339 network_uniprot_cols=[2,3,4,5,8]
342 for line in bioplex : 342 for line in bioplex :
343 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols] 343 dico_network["GeneID"][line[0]]=[line[i] for i in network_geneid_cols]
344 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols] 344 dico_network["UniProt-AC"][line[2]]=[line[i] for i in network_uniprot_cols]
345 dico_GeneID_to_UniProt[line[0]]=line[2] 345 dico_GeneID_to_UniProt[line[0]]=line[2]
346 346
347 download = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt") 347 r = requests.get("https://reactome.org/download/current/UniProt2Reactome.txt")
348 decoded_content = download.content.decode('utf-8') 348 r.encoding ="utf-8"
349 tab_file = csv.reader(decoded_content.splitlines(), delimiter='\t') 349 tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
350 dico_nodes = {} 350 dico_nodes = {}
351 uniProt_index=0 351 uniProt_index=0
352 pathway_description_index=3 352 pathway_description_index=3
353 species_index=5 353 species_index=5
354 for line in tab_file : 354 for line in tab_file :