# HG changeset patch # User dchristiany # Date 1549034518 18000 # Node ID 80fc0b28e227c6bf0fd35a4f488bbedbc0dd50c5 # Parent ec7a4d773c457ea5868f911fe85098cfd96c4d3d planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty diff -r ec7a4d773c45 -r 80fc0b28e227 data_manager/resource_building.py --- a/data_manager/resource_building.py Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager/resource_building.py Fri Feb 01 10:21:58 2019 -0500 @@ -113,6 +113,13 @@ else : return False +def check_entrez_geneid (id) : + entrez_pattern = re.complie("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+") + if entrez_pattern.match(id) : + return True + else : + return False + ####################################################################################################### # 3. ID mapping file ####################################################################################################### @@ -272,6 +279,21 @@ # 4. Build protein interaction maps files ####################################################################################################### +def get_interactant_name(line): + + if line[0] in dico_geneid_to_gene_name : + print line[0] + interactant_A = dico_geneid_to_gene_name[line[0]] + else : + interactant_A = "NA" + + if line[1] in dico_geneid_to_gene_name : + interactant_B = dico_geneid_to_gene_name[line[1]] + else : + interactant_B = "NA" + + return interactant_A, interactant_B + def PPI_ref_files(data_manager_dict, species, interactome, target_directory): species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"} @@ -315,15 +337,15 @@ tab_file = csv.reader(r.content.splitlines(), delimiter='\t') dico_nodes = {} - uniProt_index=0 + geneid_index=0 pathway_description_index=3 species_index=5 for line in tab_file : if line[species_index]==species_dict[species]: - if line[uniProt_index] in dico_nodes : - dico_nodes[line[uniProt_index]].append(line[pathway_description_index]) + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) else : - dico_nodes[line[uniProt_index]] = [line[pathway_description_index]] + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] dico={} dico['network']=dico_network @@ -376,15 +398,15 @@ tab_file = csv.reader(r.content.splitlines(), delimiter='\t') dico_nodes_geneid = {} - uniProt_index=0 + geneid_index=0 pathway_description_index=3 species_index=5 for line in tab_file : if line[species_index]==species_dict[species]: - if line[uniProt_index] in dico_nodes_geneid : - dico_nodes_geneid[line[uniProt_index]].append(line[pathway_description_index]) + if line[geneid_index] in dico_nodes_geneid : + dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index]) else : - dico_nodes_geneid[line[uniProt_index]] = [line[pathway_description_index]] + dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]] dico={} dico_nodes={} @@ -394,6 +416,61 @@ dico['nodes']=dico_nodes dico['convert']=dico_GeneID_to_UniProt + ##Humap + elif interactome=="humap": + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt') + r = r.content.decode('utf-8') + humap_nodes = csv.reader(r.splitlines(), delimiter=',') + + dico_geneid_to_gene_name={} + for line in humap_nodes : + if check_entrez_geneid(line[5]): + if line[5] not in dico_geneid_to_gene_name: + dico_geneid_to_gene_name[line[5]]=[line[4]] + else : + if line[4] not in dico_geneid_to_gene_name[line[5]] : + dico_geneid_to_gene_name[line[5]].append(line[4]) + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt') + r = r.content.decode('utf-8') + humap = csv.reader(r.splitlines(), delimiter='\t') + + dico_network = {} + for line in humap : + if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]): + + interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name) + + if line[0] not in dico_network: + dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]] + else : + dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]]) + + with requests.Session() as s: + r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes = {} + geneid_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + #Fill dictionary with pathways + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) + else : + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] + + dico={} + dico['network']=dico_network + dico['nodes']=dico_nodes + dico['gene_name']=dico_geneid_to_gene_name + #writing output output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json" path = os.path.join(target_directory,output_file) @@ -464,7 +541,10 @@ ## Download PPI ref files from biogrid/bioplex/humap try: interactome=args.interactome - species=args.species + if interactome == "biogrid" : + species=args.species + else : + species="Human" except NameError: interactome=None species=None diff -r ec7a4d773c45 -r 80fc0b28e227 data_manager/resource_building.xml --- a/data_manager/resource_building.xml Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager/resource_building.xml Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ - + to create or update reference files for proteore tools @@ -18,8 +18,10 @@ #else if $database.database == "id_mapping" --id_mapping="$database.species" #else if $database.database == "PPI" - --species="$database.species" - --interactome="$database.interactome" + --interactome="$database.base.interactome" + #if $database.base.interacome == "biogrid" + --species="$database.base.species" + #end if #end if --output "$output" @@ -71,16 +73,22 @@ - - - - - - - - - - + + + + + + + + + + + + + + + + diff -r ec7a4d773c45 -r 80fc0b28e227 data_manager_conf.xml --- a/data_manager_conf.xml Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager_conf.xml Fri Feb 01 10:21:58 2019 -0500 @@ -75,5 +75,20 @@ + + + + + + + + + PPI_dictionaries/ + + ${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json + abspath + + + diff -r ec7a4d773c45 -r 80fc0b28e227 tool-data/proteore_biogrid_dictionaries.loc.sample --- a/tool-data/proteore_biogrid_dictionaries.loc.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool-data/proteore_biogrid_dictionaries.loc.sample Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ #id name value path -#biogrid_human_08-01-2019 Human (Homo sapiens) human PPI_dictionaries/human_biogrid_dict.json -#biogrid_mouse_08-01-2019 Mouse (Mus musculus) mouse PPI_dictionaries/mouse_biogrid_dict.json -#biogrid_rat_08-01-2019 Rat (Rattus norvegicus) rat PPI_dictionaries/rat_biogrid_dict.json +#biogrid_human_08-01-2019 Human (Homo sapiens) Human PPI_dictionaries/Human_biogrid.json +#biogrid_mouse_08-01-2019 Mouse (Mus musculus) Mouse PPI_dictionaries/Mouse_biogrid.json +#biogrid_rat_08-01-2019 Rat (Rattus norvegicus) Rat PPI_dictionaries/Rat_biogrid.json diff -r ec7a4d773c45 -r 80fc0b28e227 tool-data/proteore_bioplex_dictionaries.loc.sample --- a/tool-data/proteore_bioplex_dictionaries.loc.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool-data/proteore_bioplex_dictionaries.loc.sample Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ #id name value path -#bioplex_human_08-01-2019 Human (Homo sapiens) human PPI_dictionaries/human_bioplex_dict.json -#bioplex_mouse_08-01-2019 Mouse (Mus musculus) mouse PPI_dictionaries/mouse_bioplex_dict.json -#bioplex_rat_08-01-2019 Rat (Rattus norvegicus) rat PPI_dictionaries/rat_bioplex_dict.json +#bioplex_human_08-01-2019 Human (Homo sapiens) Human PPI_dictionaries/human_bioplex.json +#bioplex_mouse_08-01-2019 Mouse (Mus musculus) Mouse PPI_dictionaries/mouse_bioplex.json +#bioplex_rat_08-01-2019 Rat (Rattus norvegicus) Rat PPI_dictionaries/rat_bioplexdico.json diff -r ec7a4d773c45 -r 80fc0b28e227 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool_data_table_conf.xml.sample Fri Feb 01 10:21:58 2019 -0500 @@ -20,4 +20,8 @@ id, name, value, path + + id, name, value, path + +