changeset 10:2f153b41b6fe draft

planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
author dchristiany
date Tue, 23 Oct 2018 07:43:37 -0400
parents 6c47b77d89d6
children 4bb219eee10f
files data_manager/resource_building.py data_manager/resource_building.xml data_manager_conf.xml tool-data/id_mapping.loc.sample tool-data/peptide_atlas.loc.sample tool-data/protein_atlas.loc.sample tool_data_table_conf.xml.sample
diffstat 7 files changed, 51 insertions(+), 56 deletions(-) [+]
line wrap: on
line diff
--- a/data_manager/resource_building.py	Fri Oct 19 05:42:11 2018 -0400
+++ b/data_manager/resource_building.py	Tue Oct 23 07:43:37 2018 -0400
@@ -1,5 +1,5 @@
 """
-The purpose of this script is to create source files from different databases to be used in other tools
+The purpose of this script is to create source files from different databases to be used in other proteore tools
 """
 
 import os, sys, argparse, requests, time, csv, re
@@ -44,13 +44,14 @@
     elif tissue == "HPA_full_atlas":
         tissue_name = "HPA full atlas"
         url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
+    
     output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
     path = os.path.join(target_directory, output_file)
-    unzip(url, path)
-    print(str(os.path.isfile(path)))
-    tmp=open(path,"r").readlines()
+    unzip(url, path)    #download and save file
     tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
-    data_table_entry = dict(value = tissue, name = tissue_name, path = path)
+    tissue_id = tissue_name.replace(" ","_").replace("/","-")
+
+    data_table_entry = dict(id=tissue_id, name = tissue_name, value = tissue, path = path)
     _add_data_table_entry(data_manager_dict, data_table_entry, "protein_atlas")
 
 
@@ -76,10 +77,11 @@
     #build dictionary by only keeping uniprot accession (not isoform) as key and sum of observations as value
     uni_dict = build_dictionary(cr)
 
-    tissue_id = "_".join([atlas_build_id, organism_id, sample_category_id,time.strftime("%d-%m-%Y")])
-    tissue_value = tissue.split("-")[1]
-    tissue = tissue.split("-")[1] + "_" +time.strftime("%d-%m-%Y")
-    tissue_name = " ".join(tissue_value.split("_")) + " " + time.strftime("%d/%m/%Y")
+    #columns of data table peptide_atlas
+    date = time.strftime("%d-%m-%Y")
+    tissue = tissue.split("-")[1]
+    tissue_id = tissue+"_"+date
+    tissue_name = tissue_id.replace("-","/").replace("_"," ")
     path = os.path.join(target_directory,output_file)
 
     with open(path,"wb") as out :
@@ -87,7 +89,7 @@
         w.writerow(["Uniprot_AC","nb_obs"])
         w.writerows(uni_dict.items())
         
-    data_table_entry = dict(value = path, name = tissue_name, tissue = tissue)
+    data_table_entry = dict(id=tissue_id, name=tissue_name, value = path, tissue = tissue)
     _add_data_table_entry(data_manager_dict, data_table_entry, "peptide_atlas")
 
 #function to count the number of observations by uniprot id
@@ -215,9 +217,10 @@
         w.writerows(tab)
 
     name_dict={"human" : "Homo sapiens", "mouse" : "Mus musculus", "rat" : "Rattus norvegicus"}
-    name = name_dict[species]+" ("+time.strftime("%d-%m-%Y")+")"
+    name = name_dict[species]+" "+time.strftime("%d/%m/%Y")
+    id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
 
-    data_table_entry = dict(value = species+"_id_mapping_"+ time.strftime("%d-%m-%Y"), name = name, path = path)
+    data_table_entry = dict(id=id, name = name, value = species, path = path)
     _add_data_table_entry(data_manager_dict, data_table_entry, "id_mapping")
 
 def download_from_uniprot_ftp(file,target_directory) :
--- a/data_manager/resource_building.xml	Fri Oct 19 05:42:11 2018 -0400
+++ b/data_manager/resource_building.xml	Tue Oct 23 07:43:37 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2018.10.19" tool_type="manage_data">
+<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2018.10.23" tool_type="manage_data">
 <description>
 to create or update reference files for proteore tools
 </description>
@@ -48,9 +48,9 @@
         </when>
         <when value="id_mapping">
             <param name="species" type="select" multiple="false" label="Please select the species">
-                <option value="human">Homo sapiens</option>
-                <option value="mouse">Mus musculus</option>
-                <option value="rat">Rattus norvegicus</option>
+                <option value="human">Human (Homo sapiens)</option>
+                <option value="mouse">Mouse (Mus musculus)</option>
+                <option value="rat">Rat (Rattus norvegicus)</option>
             </param>
         </when>
     </conditional>
--- a/data_manager_conf.xml	Fri Oct 19 05:42:11 2018 -0400
+++ b/data_manager_conf.xml	Tue Oct 23 07:43:37 2018 -0400
@@ -3,41 +3,44 @@
     <data_manager tool_file="data_manager/resource_building.xml" id="resource_building">
         <data_table name="protein_atlas">
             <output>
+                <column name="id"/>
+                <column name="name" />
                 <column name="value" />
-                <column name="name" />
                 <column name="path" output_ref="output" >
                     <move type="file">
-                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/${path}</target>
+                        <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${path}</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
         </data_table>
         <data_table name="peptide_atlas">
             <output>
+                <column name="id"/>
+                <column name="name" />
                 <column name="tissue" />
-                <column name="name" />
                 <column name="value" output_ref="output" >
                     <move type="file">
                         <!--source>${path}/${value}.tsv</source-->
                         <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">peptide_atlas/</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${tissue}.tsv</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${id}.tsv</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
         </data_table>
         <data_table name="id_mapping">
             <output>
+                <column name="id" />
+                <column name="name" />
                 <column name="value" />
-                <column name="name" />
                 <column name="path" output_ref="output" >
                     <move type="file">
                         <!--source>${path}</source-->
                         <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target>
                     </move>
-                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${value}.tsv</value_translation>
+                    <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation>
                     <value_translation type="function">abspath</value_translation>
                 </column>
             </output>
--- a/tool-data/id_mapping.loc.sample	Fri Oct 19 05:42:11 2018 -0400
+++ b/tool-data/id_mapping.loc.sample	Tue Oct 23 07:43:37 2018 -0400
@@ -1,5 +1,5 @@
 #This file lists the locations of reference file for id_converter tool
-#<name>	<value>	<path>
-#human_id_mapping	Human (homo sapiens)	tool-data/human_id_mapping_file.tsv
-#mouse_id_mapping	Mouse (Mus musculus)	tool-data/mouse_id_mapping.tsv
-#rat_id_mapping 	Rat (Rattus norvegicus)	tool-data/rat_id_mapping.tsv
+#<id>	<name>	<value>	<path>
+#human_id_mapping_01-01-2018	Human (homo sapiens)	human_id_mapping	tool-data/human_id_mapping_file.tsv
+#mouse_id_mapping_01-01-2018	Mouse (Mus musculus)	mouse_id_mapping	tool-data/mouse_id_mapping.tsv
+#rat_id_mapping_01-01-2018	Rat (Rattus norvegicus)	rat_id_mapping	tool-data/rat_id_mapping.tsv
--- a/tool-data/peptide_atlas.loc.sample	Fri Oct 19 05:42:11 2018 -0400
+++ b/tool-data/peptide_atlas.loc.sample	Tue Oct 23 07:43:37 2018 -0400
@@ -1,15 +1,10 @@
-#This file lists the locations name and values of reference files 
-#for number of MS/MS observations in a tissue
+#This file lists the locations name and values of reference files for number of MS/MS observations in a tissue
 #This is a tab separated file (TAB, not 4 spaces !)
-#
-#<tissue>	<name>	<value>
-
-
-#Human_Heart_20-07-2018	Human Heart 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Heart_20-07-2018.tsv
-#Human_Liver_20-07-2018	Human Liver 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_20-07-2018.tsv
-#Human_Urine_20-07-2018	Human Urine 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Urine_20-07-2018.tsv
-#Human_Brain_20-07-2018	Human Brain 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Brain_20-07-2018.tsv
-#Human_Kidney_20-07-2018	Human Kidney 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Kidney_20-07-2018.tsv
-#Human_Plasma_20-07-2018	Human Plasma 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Plasma_20-07-2018.tsv
-#Human_CSF_20-07-2018	Human CSF 20/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_CSF_20-07-2018.tsv
-#Human_Liver_23-07-2018	Human Liver 23/07/2018	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_23-07-2018.tsv
+#<id>	<name>	<tissue>	<value>
+#Human_Heart_20-07-2018	Human Heart 20/07/2018	Human_Heart	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Heart_20-07-2018.tsv
+#Human_Liver_20-07-2018	Human Liver 20/07/2018	Human_Liver	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_20-07-2018.tsv
+#Human_Urine_20-07-2018	Human Urine 20/07/2018	Human_Urine	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Urine_20-07-2018.tsv
+#Human_Brain_20-07-2018	Human Brain 20/07/2018	Human_Brain	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Brain_20-07-2018.tsv
+#Human_Kidney_20-07-2018	Human Kidney 20/07/2018	Human_Kidney	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Kidney_20-07-2018.tsv
+#Human_Plasma_20-07-2018	Human Plasma 20/07/2018	Human_Plasma	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Plasma_20-07-2018.tsv
+#Human_CSF_20-07-2018	Human CSF 20/07/2018	Human_CSF	/projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_CSF_20-07-2018.tsv
--- a/tool-data/protein_atlas.loc.sample	Fri Oct 19 05:42:11 2018 -0400
+++ b/tool-data/protein_atlas.loc.sample	Tue Oct 23 07:43:37 2018 -0400
@@ -1,12 +1,6 @@
-#This file lists the locations name and values of reference files 
-#for Get expression data tool
+#This file lists the locations name and values of reference files for Get expression data tool
 #This is a tab separated file (TAB, not 4 spaces !)
-#
-#<name> <value>	<path>
-#
-#protein_atlas.loc could look something like this:
-#
-#HPA normal tissue 19/07/2018	HPA_normal_tissue	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv
-#HPA pathology 19/07/2018	HPA_pathology	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv
-#HPA full atlas 19/07/2018	HPA_full_atlas	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19161/dataset_39309_files/HPA_full_atlas_19-07-2018.tsv
-#
+#<id>	<name> <value>	<path>
+#HPA_normal_tissue_19-07-2018	HPA normal tissue 19/07/2018	HPA_normal_tissue	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv
+#HPA_pathology_19-07-2018	HPA pathology 19/07/2018	HPA_pathology	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv
+#HPA_full_atlas_19-07-2018	HPA full atlas 19/07/2018	HPA_full_atlas	/projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19161/dataset_39309_files/HPA_full_atlas_19-07-2018.tsv
--- a/tool_data_table_conf.xml.sample	Fri Oct 19 05:42:11 2018 -0400
+++ b/tool_data_table_conf.xml.sample	Tue Oct 23 07:43:37 2018 -0400
@@ -1,15 +1,15 @@
 <?xml version="1.0"?>
 <tables>
     <table name='peptide_atlas' comment_char="#">
-        <columns>tissue,name,value</columns>
-        <file path="tool-data/peptide_atlas.loc"/>
+      <columns>id, name, tissue, value</columns>
+      <file path="tool-data/peptide_atlas.loc"/>
     </table>
     <table name="protein_atlas" comment_char="#">
-      <columns>name, value, path</columns>
+      <columns>id, name, value, path</columns>
       <file path="tool-data/protein_atlas.loc" />
     </table>
     <table name="id_mapping" comment_char="#">
-      <columns>name, value, path</columns>
+      <columns>id, name, value, path</columns>
       <file path="tool-data/id_mapping.loc" />
     </table>
 </tables>