Mercurial > repos > dchristiany > data_manager_proteore
changeset 46:80fc0b28e227 draft
planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
author | dchristiany |
---|---|
date | Fri, 01 Feb 2019 10:21:58 -0500 |
parents | ec7a4d773c45 |
children | 7b486b0fba4e |
files | data_manager/resource_building.py data_manager/resource_building.xml data_manager_conf.xml tool-data/proteore_biogrid_dictionaries.loc.sample tool-data/proteore_bioplex_dictionaries.loc.sample tool_data_table_conf.xml.sample |
diffstat | 6 files changed, 135 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/resource_building.py Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager/resource_building.py Fri Feb 01 10:21:58 2019 -0500 @@ -113,6 +113,13 @@ else : return False +def check_entrez_geneid (id) : + entrez_pattern = re.complie("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+") + if entrez_pattern.match(id) : + return True + else : + return False + ####################################################################################################### # 3. ID mapping file ####################################################################################################### @@ -272,6 +279,21 @@ # 4. Build protein interaction maps files ####################################################################################################### +def get_interactant_name(line): + + if line[0] in dico_geneid_to_gene_name : + print line[0] + interactant_A = dico_geneid_to_gene_name[line[0]] + else : + interactant_A = "NA" + + if line[1] in dico_geneid_to_gene_name : + interactant_B = dico_geneid_to_gene_name[line[1]] + else : + interactant_B = "NA" + + return interactant_A, interactant_B + def PPI_ref_files(data_manager_dict, species, interactome, target_directory): species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"} @@ -315,15 +337,15 @@ tab_file = csv.reader(r.content.splitlines(), delimiter='\t') dico_nodes = {} - uniProt_index=0 + geneid_index=0 pathway_description_index=3 species_index=5 for line in tab_file : if line[species_index]==species_dict[species]: - if line[uniProt_index] in dico_nodes : - dico_nodes[line[uniProt_index]].append(line[pathway_description_index]) + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) else : - dico_nodes[line[uniProt_index]] = [line[pathway_description_index]] + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] dico={} dico['network']=dico_network @@ -376,15 +398,15 @@ tab_file = csv.reader(r.content.splitlines(), delimiter='\t') dico_nodes_geneid = {} - uniProt_index=0 + geneid_index=0 pathway_description_index=3 species_index=5 for line in tab_file : if line[species_index]==species_dict[species]: - if line[uniProt_index] in dico_nodes_geneid : - dico_nodes_geneid[line[uniProt_index]].append(line[pathway_description_index]) + if line[geneid_index] in dico_nodes_geneid : + dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index]) else : - dico_nodes_geneid[line[uniProt_index]] = [line[pathway_description_index]] + dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]] dico={} dico_nodes={} @@ -394,6 +416,61 @@ dico['nodes']=dico_nodes dico['convert']=dico_GeneID_to_UniProt + ##Humap + elif interactome=="humap": + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt') + r = r.content.decode('utf-8') + humap_nodes = csv.reader(r.splitlines(), delimiter=',') + + dico_geneid_to_gene_name={} + for line in humap_nodes : + if check_entrez_geneid(line[5]): + if line[5] not in dico_geneid_to_gene_name: + dico_geneid_to_gene_name[line[5]]=[line[4]] + else : + if line[4] not in dico_geneid_to_gene_name[line[5]] : + dico_geneid_to_gene_name[line[5]].append(line[4]) + + with requests.Session() as s: + r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt') + r = r.content.decode('utf-8') + humap = csv.reader(r.splitlines(), delimiter='\t') + + dico_network = {} + for line in humap : + if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]): + + interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name) + + if line[0] not in dico_network: + dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]] + else : + dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]]) + + with requests.Session() as s: + r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt') + r.encoding ="utf-8" + tab_file = csv.reader(r.content.splitlines(), delimiter='\t') + + dico_nodes = {} + geneid_index=0 + pathway_description_index=3 + species_index=5 + for line in tab_file : + if line[species_index]==species_dict[species]: + #Fill dictionary with pathways + if line[geneid_index] in dico_nodes : + dico_nodes[line[geneid_index]].append(line[pathway_description_index]) + else : + dico_nodes[line[geneid_index]] = [line[pathway_description_index]] + + dico={} + dico['network']=dico_network + dico['nodes']=dico_nodes + dico['gene_name']=dico_geneid_to_gene_name + #writing output output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json" path = os.path.join(target_directory,output_file) @@ -464,7 +541,10 @@ ## Download PPI ref files from biogrid/bioplex/humap try: interactome=args.interactome - species=args.species + if interactome == "biogrid" : + species=args.species + else : + species="Human" except NameError: interactome=None species=None
--- a/data_manager/resource_building.xml Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager/resource_building.xml Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.01.31.2" tool_type="manage_data"> +<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.02.01" tool_type="manage_data"> <description> to create or update reference files for proteore tools </description> @@ -18,8 +18,10 @@ #else if $database.database == "id_mapping" --id_mapping="$database.species" #else if $database.database == "PPI" - --species="$database.species" - --interactome="$database.interactome" + --interactome="$database.base.interactome" + #if $database.base.interacome == "biogrid" + --species="$database.base.species" + #end if #end if --output "$output" @@ -71,16 +73,22 @@ </param> </when> <when value="PPI"> - <param name="interactome" type="select" multiple="false" label="Please select interactome"> - <option value="biogrid">BioGRID</option> - <option value="bioplex">Bioplex</option> - <option value="humap">Hu.map</option> - </param> - <param name="species" type="select" multiple="false" label="Please select the species"> - <option value="Human">Human (Homo sapiens)</option> - <option value="Mouse">Mouse (Mus musculus)</option> - <option value="Rat">Rat (Rattus norvegicus)</option> - </param> + <conditional name="base"> + <param name="interactome" type="select" multiple="false" label="Please select interactome"> + <option value="biogrid">BioGRID</option> + <option value="bioplex">Human Bioplex 2.0</option> + <option value="humap">Human protein complex Map (Hu.map)</option> + </param> + <when value="biogrid"> + <param name="species" type="select" multiple="false" label="Please select the species"> + <option value="Human">Human (Homo sapiens)</option> + <option value="Mouse">Mouse (Mus musculus)</option> + <option value="Rat">Rat (Rattus norvegicus)</option> + </param> + </when> + <when value="bioplex"/> + <when value="humap"/> + </conditional> </when> </conditional> </inputs>
--- a/data_manager_conf.xml Thu Jan 31 08:58:26 2019 -0500 +++ b/data_manager_conf.xml Fri Feb 01 10:21:58 2019 -0500 @@ -75,5 +75,20 @@ </column> </output> </data_table> + <data_table name="proteore_humap_dictionaries"> + <output> + <column name="id" /> + <column name="name" /> + <column name="value" /> + <column name="path" output_ref="output" > + <move type="file"> + <!--source>${path}</source--> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target> + </move> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation> + <value_translation type="function">abspath</value_translation> + </column> + </output> + </data_table> </data_manager> </data_managers>
--- a/tool-data/proteore_biogrid_dictionaries.loc.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool-data/proteore_biogrid_dictionaries.loc.sample Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ #id name value path -#biogrid_human_08-01-2019 Human (Homo sapiens) human PPI_dictionaries/human_biogrid_dict.json -#biogrid_mouse_08-01-2019 Mouse (Mus musculus) mouse PPI_dictionaries/mouse_biogrid_dict.json -#biogrid_rat_08-01-2019 Rat (Rattus norvegicus) rat PPI_dictionaries/rat_biogrid_dict.json +#biogrid_human_08-01-2019 Human (Homo sapiens) Human PPI_dictionaries/Human_biogrid.json +#biogrid_mouse_08-01-2019 Mouse (Mus musculus) Mouse PPI_dictionaries/Mouse_biogrid.json +#biogrid_rat_08-01-2019 Rat (Rattus norvegicus) Rat PPI_dictionaries/Rat_biogrid.json
--- a/tool-data/proteore_bioplex_dictionaries.loc.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool-data/proteore_bioplex_dictionaries.loc.sample Fri Feb 01 10:21:58 2019 -0500 @@ -1,4 +1,4 @@ #id name value path -#bioplex_human_08-01-2019 Human (Homo sapiens) human PPI_dictionaries/human_bioplex_dict.json -#bioplex_mouse_08-01-2019 Mouse (Mus musculus) mouse PPI_dictionaries/mouse_bioplex_dict.json -#bioplex_rat_08-01-2019 Rat (Rattus norvegicus) rat PPI_dictionaries/rat_bioplex_dict.json +#bioplex_human_08-01-2019 Human (Homo sapiens) Human PPI_dictionaries/human_bioplex.json +#bioplex_mouse_08-01-2019 Mouse (Mus musculus) Mouse PPI_dictionaries/mouse_bioplex.json +#bioplex_rat_08-01-2019 Rat (Rattus norvegicus) Rat PPI_dictionaries/rat_bioplexdico.json
--- a/tool_data_table_conf.xml.sample Thu Jan 31 08:58:26 2019 -0500 +++ b/tool_data_table_conf.xml.sample Fri Feb 01 10:21:58 2019 -0500 @@ -20,4 +20,8 @@ <columns>id, name, value, path</columns> <file path="tool-data/proteore_bioplex_dictionaries.loc" /> </table> + <table name="proteore_humap_dictionaries" comment_char="#"> + <columns>id, name, value, path</columns> + <file path="tool-data/proteore_bioplex_dictionaries.loc" /> + </table> </tables>