changeset 46:80fc0b28e227 draft

planemo upload commit cb633de1f04ef7b7133728909716b6c6594533d1-dirty
author dchristiany
date Fri, 01 Feb 2019 10:21:58 -0500
parents ec7a4d773c45
children 7b486b0fba4e
files data_manager/resource_building.py data_manager/resource_building.xml data_manager_conf.xml tool-data/proteore_biogrid_dictionaries.loc.sample tool-data/proteore_bioplex_dictionaries.loc.sample tool_data_table_conf.xml.sample
diffstat 6 files changed, 135 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/resource_building.py	Thu Jan 31 08:58:26 2019 -0500
+++ b/data_manager/resource_building.py	Fri Feb 01 10:21:58 2019 -0500
@@ -113,6 +113,13 @@
     else :
         return False
 
+def check_entrez_geneid (id) :
+    entrez_pattern = re.complie("[0-9]+|[A-Z]{1,2}_[0-9]+|[A-Z]{1,2}_[A-Z]{1,4}[0-9]+")
+    if entrez_pattern.match(id) :
+        return True
+    else :
+        return False
+
 #######################################################################################################
 # 3. ID mapping file
 #######################################################################################################
@@ -272,6 +279,21 @@
 # 4. Build protein interaction maps files
 #######################################################################################################
 
+def get_interactant_name(line):
+
+    if line[0] in dico_geneid_to_gene_name :
+        print line[0]
+        interactant_A = dico_geneid_to_gene_name[line[0]]
+    else :
+        interactant_A = "NA"
+
+    if line[1] in dico_geneid_to_gene_name :
+        interactant_B = dico_geneid_to_gene_name[line[1]]
+    else :
+        interactant_B = "NA"
+
+    return interactant_A, interactant_B
+
 def PPI_ref_files(data_manager_dict, species, interactome, target_directory):
 
     species_dict={'Human':'Homo sapiens',"Mouse":"Mus musculus","Rat":"Rattus norvegicus"}
@@ -315,15 +337,15 @@
             tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
 
         dico_nodes = {}
-        uniProt_index=0
+        geneid_index=0
         pathway_description_index=3
         species_index=5
         for line in tab_file :
             if line[species_index]==species_dict[species]:
-                if line[uniProt_index] in dico_nodes :
-                    dico_nodes[line[uniProt_index]].append(line[pathway_description_index])
+                if line[geneid_index] in dico_nodes :
+                    dico_nodes[line[geneid_index]].append(line[pathway_description_index])
                 else :
-                    dico_nodes[line[uniProt_index]] = [line[pathway_description_index]]
+                    dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
 
         dico={}
         dico['network']=dico_network
@@ -376,15 +398,15 @@
             tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
 
         dico_nodes_geneid = {}
-        uniProt_index=0
+        geneid_index=0
         pathway_description_index=3
         species_index=5
         for line in tab_file :
             if line[species_index]==species_dict[species]:
-                if line[uniProt_index] in dico_nodes_geneid :
-                    dico_nodes_geneid[line[uniProt_index]].append(line[pathway_description_index])
+                if line[geneid_index] in dico_nodes_geneid :
+                    dico_nodes_geneid[line[geneid_index]].append(line[pathway_description_index])
                 else :
-                    dico_nodes_geneid[line[uniProt_index]] = [line[pathway_description_index]]
+                    dico_nodes_geneid[line[geneid_index]] = [line[pathway_description_index]]
 
         dico={}
         dico_nodes={}
@@ -394,6 +416,61 @@
         dico['nodes']=dico_nodes
         dico['convert']=dico_GeneID_to_UniProt
 
+    ##Humap
+    elif interactome=="humap":
+
+        with requests.Session() as s:
+            r = s.get('http://proteincomplexes.org/static/downloads/nodeTable.txt')
+            r = r.content.decode('utf-8')
+            humap_nodes = csv.reader(r.splitlines(), delimiter=',')
+
+        dico_geneid_to_gene_name={}
+        for line in humap_nodes :
+            if check_entrez_geneid(line[5]):
+                if line[5] not in dico_geneid_to_gene_name:
+                    dico_geneid_to_gene_name[line[5]]=[line[4]]
+                else :
+                    if line[4] not in dico_geneid_to_gene_name[line[5]] :
+                        dico_geneid_to_gene_name[line[5]].append(line[4])
+
+        with requests.Session() as s:
+            r = s.get('http://proteincomplexes.org/static/downloads/pairsWprob.txt')
+            r = r.content.decode('utf-8')
+            humap = csv.reader(r.splitlines(), delimiter='\t')
+
+        dico_network = {}
+        for line in humap :
+            if check_entrez_geneid(line[0]) and check_entrez_geneid(line[1]):
+
+                interactant_A, interactant_B = get_interactant_name(line,dico_geneid_to_gene_name)
+
+                if line[0] not in dico_network:
+                    dico_network[line[0]]=[line[:2]+[interactant_A,interactant_B,line[2]]]
+                else :
+                    dico_network[line[0]].append(line[:2]+[interactant_A,interactant_B,line[2]])
+
+        with requests.Session() as s:
+            r = s.get('https://www.reactome.org/download/current/NCBI2Reactome.txt')
+            r.encoding ="utf-8"
+            tab_file = csv.reader(r.content.splitlines(), delimiter='\t')
+
+        dico_nodes = {}
+        geneid_index=0
+        pathway_description_index=3
+        species_index=5
+        for line in tab_file :
+            if line[species_index]==species_dict[species]:
+                #Fill dictionary with pathways
+                if line[geneid_index] in dico_nodes :
+                    dico_nodes[line[geneid_index]].append(line[pathway_description_index])
+                else :
+                    dico_nodes[line[geneid_index]] = [line[pathway_description_index]]
+
+        dico={}
+        dico['network']=dico_network
+        dico['nodes']=dico_nodes
+        dico['gene_name']=dico_geneid_to_gene_name
+
     #writing output
     output_file = species+'_'+interactome+'_'+ time.strftime("%d-%m-%Y") + ".json"
     path = os.path.join(target_directory,output_file)
@@ -464,7 +541,10 @@
     ## Download PPI ref files from biogrid/bioplex/humap
     try:
         interactome=args.interactome
-        species=args.species
+        if interactome == "biogrid" :
+            species=args.species
+        else :
+            species="Human"
     except NameError:
         interactome=None
         species=None
--- a/data_manager/resource_building.xml	Thu Jan 31 08:58:26 2019 -0500
+++ b/data_manager/resource_building.xml	Fri Feb 01 10:21:58 2019 -0500
@@ -1,4 +1,4 @@
-<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.01.31.2" tool_type="manage_data">
+<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.02.01" tool_type="manage_data">
 <description>
 to create or update reference files for proteore tools
 </description>
@@ -18,8 +18,10 @@
     #else if $database.database == "id_mapping"
         --id_mapping="$database.species"
     #else if $database.database == "PPI"
-        --species="$database.species"
-        --interactome="$database.interactome"
+        --interactome="$database.base.interactome"
+        #if $database.base.interacome == "biogrid"
+            --species="$database.base.species"
+        #end if
     #end if
     --output "$output"
     
@@ -71,16 +73,22 @@
             </param>
         </when>
         <when value="PPI">
-            <param name="interactome" type="select" multiple="false" label="Please select interactome">
-                <option value="biogrid">BioGRID</option>
-                <option value="bioplex">Bioplex</option>
-                <option value="humap">Hu.map</option>
-            </param>
-            <param name="species" type="select" multiple="false" label="Please select the species">
-                <option value="Human">Human (Homo sapiens)</option>
-                <option value="Mouse">Mouse (Mus musculus)</option>
-                <option value="Rat">Rat (Rattus norvegicus)</option>
-            </param>
+            <conditional name="base">
+                <param name="interactome" type="select" multiple="false" label="Please select interactome">
+                    <option value="biogrid">BioGRID</option>
+                    <option value="bioplex">Human Bioplex 2.0</option>
+                    <option value="humap">Human protein complex Map (Hu.map)</option>
+                </param>
+                <when value="biogrid">
+                    <param name="species" type="select" multiple="false" label="Please select the species">
+                        <option value="Human">Human (Homo sapiens)</option>
+                        <option value="Mouse">Mouse (Mus musculus)</option>
+                        <option value="Rat">Rat (Rattus norvegicus)</option>
+                    </param>
+                </when>
+                <when value="bioplex"/>
+                <when value="humap"/>
+            </conditional>
         </when>
     </conditional>
 </inputs>
--- a/data_manager_conf.xml	Thu Jan 31 08:58:26 2019 -0500
+++ b/data_manager_conf.xml	Fri Feb 01 10:21:58 2019 -0500
@@ -75,5 +75,20 @@
                 </column>
             </output>
         </data_table>
+        <data_table name="proteore_humap_dictionaries">
+            <output>
+                <column name="id" />
+                <column name="name" />
+                <column name="value" />
+                <column name="path" output_ref="output" >
+                    <move type="file">
+                        <!--source>${path}</source-->
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">PPI_dictionaries/</target>
+                    </move>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/PPI_dictionaries/${id}.json</value_translation>
+                    <value_translation type="function">abspath</value_translation>
+                </column>
+            </output>
+        </data_table>
     </data_manager>
 </data_managers>
--- a/tool-data/proteore_biogrid_dictionaries.loc.sample	Thu Jan 31 08:58:26 2019 -0500
+++ b/tool-data/proteore_biogrid_dictionaries.loc.sample	Fri Feb 01 10:21:58 2019 -0500
@@ -1,4 +1,4 @@
 #id	name	value	path
-#biogrid_human_08-01-2019	Human (Homo sapiens)	human	PPI_dictionaries/human_biogrid_dict.json
-#biogrid_mouse_08-01-2019	Mouse (Mus musculus)	mouse	PPI_dictionaries/mouse_biogrid_dict.json
-#biogrid_rat_08-01-2019	Rat (Rattus norvegicus)	rat	PPI_dictionaries/rat_biogrid_dict.json
+#biogrid_human_08-01-2019	Human (Homo sapiens)	Human	PPI_dictionaries/Human_biogrid.json
+#biogrid_mouse_08-01-2019	Mouse (Mus musculus)	Mouse	PPI_dictionaries/Mouse_biogrid.json
+#biogrid_rat_08-01-2019	Rat (Rattus norvegicus) Rat	PPI_dictionaries/Rat_biogrid.json
--- a/tool-data/proteore_bioplex_dictionaries.loc.sample	Thu Jan 31 08:58:26 2019 -0500
+++ b/tool-data/proteore_bioplex_dictionaries.loc.sample	Fri Feb 01 10:21:58 2019 -0500
@@ -1,4 +1,4 @@
 #id	name	value	path
-#bioplex_human_08-01-2019	Human (Homo sapiens)	human	PPI_dictionaries/human_bioplex_dict.json
-#bioplex_mouse_08-01-2019	Mouse (Mus musculus)	mouse	PPI_dictionaries/mouse_bioplex_dict.json
-#bioplex_rat_08-01-2019	Rat (Rattus norvegicus)	rat	PPI_dictionaries/rat_bioplex_dict.json
+#bioplex_human_08-01-2019	Human (Homo sapiens)	Human	PPI_dictionaries/human_bioplex.json
+#bioplex_mouse_08-01-2019	Mouse (Mus musculus)	Mouse	PPI_dictionaries/mouse_bioplex.json
+#bioplex_rat_08-01-2019	Rat (Rattus norvegicus)	Rat	PPI_dictionaries/rat_bioplexdico.json
--- a/tool_data_table_conf.xml.sample	Thu Jan 31 08:58:26 2019 -0500
+++ b/tool_data_table_conf.xml.sample	Fri Feb 01 10:21:58 2019 -0500
@@ -20,4 +20,8 @@
       <columns>id, name, value, path</columns>
       <file path="tool-data/proteore_bioplex_dictionaries.loc" />
     </table>
+    <table name="proteore_humap_dictionaries" comment_char="#">
+      <columns>id, name, value, path</columns>
+      <file path="tool-data/proteore_bioplex_dictionaries.loc" />
+    </table>
 </tables>