Mercurial > repos > dchristiany > data_manager_proteore
changeset 10:2f153b41b6fe draft
planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
| author | dchristiany | 
|---|---|
| date | Tue, 23 Oct 2018 07:43:37 -0400 | 
| parents | 6c47b77d89d6 | 
| children | 4bb219eee10f | 
| files | data_manager/resource_building.py data_manager/resource_building.xml data_manager_conf.xml tool-data/id_mapping.loc.sample tool-data/peptide_atlas.loc.sample tool-data/protein_atlas.loc.sample tool_data_table_conf.xml.sample | 
| diffstat | 7 files changed, 51 insertions(+), 56 deletions(-) [+] | 
line wrap: on
 line diff
--- a/data_manager/resource_building.py Fri Oct 19 05:42:11 2018 -0400 +++ b/data_manager/resource_building.py Tue Oct 23 07:43:37 2018 -0400 @@ -1,5 +1,5 @@ """ -The purpose of this script is to create source files from different databases to be used in other tools +The purpose of this script is to create source files from different databases to be used in other proteore tools """ import os, sys, argparse, requests, time, csv, re @@ -44,13 +44,14 @@ elif tissue == "HPA_full_atlas": tissue_name = "HPA full atlas" url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip" + output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv" path = os.path.join(target_directory, output_file) - unzip(url, path) - print(str(os.path.isfile(path))) - tmp=open(path,"r").readlines() + unzip(url, path) #download and save file tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y") - data_table_entry = dict(value = tissue, name = tissue_name, path = path) + tissue_id = tissue_name.replace(" ","_").replace("/","-") + + data_table_entry = dict(id=tissue_id, name = tissue_name, value = tissue, path = path) _add_data_table_entry(data_manager_dict, data_table_entry, "protein_atlas") @@ -76,10 +77,11 @@ #build dictionary by only keeping uniprot accession (not isoform) as key and sum of observations as value uni_dict = build_dictionary(cr) - tissue_id = "_".join([atlas_build_id, organism_id, sample_category_id,time.strftime("%d-%m-%Y")]) - tissue_value = tissue.split("-")[1] - tissue = tissue.split("-")[1] + "_" +time.strftime("%d-%m-%Y") - tissue_name = " ".join(tissue_value.split("_")) + " " + time.strftime("%d/%m/%Y") + #columns of data table peptide_atlas + date = time.strftime("%d-%m-%Y") + tissue = tissue.split("-")[1] + tissue_id = tissue+"_"+date + tissue_name = tissue_id.replace("-","/").replace("_"," ") path = os.path.join(target_directory,output_file) with open(path,"wb") as out : @@ -87,7 +89,7 @@ w.writerow(["Uniprot_AC","nb_obs"]) w.writerows(uni_dict.items()) - data_table_entry = dict(value = path, name = tissue_name, tissue = tissue) + data_table_entry = dict(id=tissue_id, name=tissue_name, value = path, tissue = tissue) _add_data_table_entry(data_manager_dict, data_table_entry, "peptide_atlas") #function to count the number of observations by uniprot id @@ -215,9 +217,10 @@ w.writerows(tab) name_dict={"human" : "Homo sapiens", "mouse" : "Mus musculus", "rat" : "Rattus norvegicus"} - name = name_dict[species]+" ("+time.strftime("%d-%m-%Y")+")" + name = name_dict[species]+" "+time.strftime("%d/%m/%Y") + id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y") - data_table_entry = dict(value = species+"_id_mapping_"+ time.strftime("%d-%m-%Y"), name = name, path = path) + data_table_entry = dict(id=id, name = name, value = species, path = path) _add_data_table_entry(data_manager_dict, data_table_entry, "id_mapping") def download_from_uniprot_ftp(file,target_directory) :
--- a/data_manager/resource_building.xml Fri Oct 19 05:42:11 2018 -0400 +++ b/data_manager/resource_building.xml Tue Oct 23 07:43:37 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2018.10.19" tool_type="manage_data"> +<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2018.10.23" tool_type="manage_data"> <description> to create or update reference files for proteore tools </description> @@ -48,9 +48,9 @@ </when> <when value="id_mapping"> <param name="species" type="select" multiple="false" label="Please select the species"> - <option value="human">Homo sapiens</option> - <option value="mouse">Mus musculus</option> - <option value="rat">Rattus norvegicus</option> + <option value="human">Human (Homo sapiens)</option> + <option value="mouse">Mouse (Mus musculus)</option> + <option value="rat">Rat (Rattus norvegicus)</option> </param> </when> </conditional>
--- a/data_manager_conf.xml Fri Oct 19 05:42:11 2018 -0400 +++ b/data_manager_conf.xml Tue Oct 23 07:43:37 2018 -0400 @@ -3,41 +3,44 @@ <data_manager tool_file="data_manager/resource_building.xml" id="resource_building"> <data_table name="protein_atlas"> <output> + <column name="id"/> + <column name="name" /> <column name="value" /> - <column name="name" /> <column name="path" output_ref="output" > <move type="file"> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/${path}</target> + <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">protein_atlas/</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${path}</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/protein_atlas/${id}.tsv</value_translation> <value_translation type="function">abspath</value_translation> </column> </output> </data_table> <data_table name="peptide_atlas"> <output> + <column name="id"/> + <column name="name" /> <column name="tissue" /> - <column name="name" /> <column name="value" output_ref="output" > <move type="file"> <!--source>${path}/${value}.tsv</source--> <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">peptide_atlas/</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${tissue}.tsv</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/peptide_atlas/${id}.tsv</value_translation> <value_translation type="function">abspath</value_translation> </column> </output> </data_table> <data_table name="id_mapping"> <output> + <column name="id" /> + <column name="name" /> <column name="value" /> - <column name="name" /> <column name="path" output_ref="output" > <move type="file"> <!--source>${path}</source--> <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">id_mapping/</target> </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${value}.tsv</value_translation> + <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/id_mapping/${id}.tsv</value_translation> <value_translation type="function">abspath</value_translation> </column> </output>
--- a/tool-data/id_mapping.loc.sample Fri Oct 19 05:42:11 2018 -0400 +++ b/tool-data/id_mapping.loc.sample Tue Oct 23 07:43:37 2018 -0400 @@ -1,5 +1,5 @@ #This file lists the locations of reference file for id_converter tool -#<name> <value> <path> -#human_id_mapping Human (homo sapiens) tool-data/human_id_mapping_file.tsv -#mouse_id_mapping Mouse (Mus musculus) tool-data/mouse_id_mapping.tsv -#rat_id_mapping Rat (Rattus norvegicus) tool-data/rat_id_mapping.tsv +#<id> <name> <value> <path> +#human_id_mapping_01-01-2018 Human (homo sapiens) human_id_mapping tool-data/human_id_mapping_file.tsv +#mouse_id_mapping_01-01-2018 Mouse (Mus musculus) mouse_id_mapping tool-data/mouse_id_mapping.tsv +#rat_id_mapping_01-01-2018 Rat (Rattus norvegicus) rat_id_mapping tool-data/rat_id_mapping.tsv
--- a/tool-data/peptide_atlas.loc.sample Fri Oct 19 05:42:11 2018 -0400 +++ b/tool-data/peptide_atlas.loc.sample Tue Oct 23 07:43:37 2018 -0400 @@ -1,15 +1,10 @@ -#This file lists the locations name and values of reference files -#for number of MS/MS observations in a tissue +#This file lists the locations name and values of reference files for number of MS/MS observations in a tissue #This is a tab separated file (TAB, not 4 spaces !) -# -#<tissue> <name> <value> - - -#Human_Heart_20-07-2018 Human Heart 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Heart_20-07-2018.tsv -#Human_Liver_20-07-2018 Human Liver 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_20-07-2018.tsv -#Human_Urine_20-07-2018 Human Urine 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Urine_20-07-2018.tsv -#Human_Brain_20-07-2018 Human Brain 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Brain_20-07-2018.tsv -#Human_Kidney_20-07-2018 Human Kidney 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Kidney_20-07-2018.tsv -#Human_Plasma_20-07-2018 Human Plasma 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Plasma_20-07-2018.tsv -#Human_CSF_20-07-2018 Human CSF 20/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_CSF_20-07-2018.tsv -#Human_Liver_23-07-2018 Human Liver 23/07/2018 /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_23-07-2018.tsv +#<id> <name> <tissue> <value> +#Human_Heart_20-07-2018 Human Heart 20/07/2018 Human_Heart /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Heart_20-07-2018.tsv +#Human_Liver_20-07-2018 Human Liver 20/07/2018 Human_Liver /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Liver_20-07-2018.tsv +#Human_Urine_20-07-2018 Human Urine 20/07/2018 Human_Urine /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Urine_20-07-2018.tsv +#Human_Brain_20-07-2018 Human Brain 20/07/2018 Human_Brain /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Brain_20-07-2018.tsv +#Human_Kidney_20-07-2018 Human Kidney 20/07/2018 Human_Kidney /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Kidney_20-07-2018.tsv +#Human_Plasma_20-07-2018 Human Plasma 20/07/2018 Human_Plasma /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_Plasma_20-07-2018.tsv +#Human_CSF_20-07-2018 Human CSF 20/07/2018 Human_CSF /projet/galaxydev/galaxy/tool-data/peptide_atlas/Human_CSF_20-07-2018.tsv
--- a/tool-data/protein_atlas.loc.sample Fri Oct 19 05:42:11 2018 -0400 +++ b/tool-data/protein_atlas.loc.sample Tue Oct 23 07:43:37 2018 -0400 @@ -1,12 +1,6 @@ -#This file lists the locations name and values of reference files -#for Get expression data tool +#This file lists the locations name and values of reference files for Get expression data tool #This is a tab separated file (TAB, not 4 spaces !) -# -#<name> <value> <path> -# -#protein_atlas.loc could look something like this: -# -#HPA normal tissue 19/07/2018 HPA_normal_tissue /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv -#HPA pathology 19/07/2018 HPA_pathology /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv -#HPA full atlas 19/07/2018 HPA_full_atlas /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19161/dataset_39309_files/HPA_full_atlas_19-07-2018.tsv -# +#<id> <name> <value> <path> +#HPA_normal_tissue_19-07-2018 HPA normal tissue 19/07/2018 HPA_normal_tissue /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19159/dataset_39307_files/HPA_normal_tissue_19-07-2018.tsv +#HPA_pathology_19-07-2018 HPA pathology 19/07/2018 HPA_pathology /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19160/dataset_39308_files/HPA_pathology_19-07-2018.tsv +#HPA_full_atlas_19-07-2018 HPA full atlas 19/07/2018 HPA_full_atlas /projet/galaxydev/galaxy/tool-data/protein_atlas/projet/galaxydev/galaxy/database/jobs_directory/019/19161/dataset_39309_files/HPA_full_atlas_19-07-2018.tsv
--- a/tool_data_table_conf.xml.sample Fri Oct 19 05:42:11 2018 -0400 +++ b/tool_data_table_conf.xml.sample Tue Oct 23 07:43:37 2018 -0400 @@ -1,15 +1,15 @@ <?xml version="1.0"?> <tables> <table name='peptide_atlas' comment_char="#"> - <columns>tissue,name,value</columns> - <file path="tool-data/peptide_atlas.loc"/> + <columns>id, name, tissue, value</columns> + <file path="tool-data/peptide_atlas.loc"/> </table> <table name="protein_atlas" comment_char="#"> - <columns>name, value, path</columns> + <columns>id, name, value, path</columns> <file path="tool-data/protein_atlas.loc" /> </table> <table name="id_mapping" comment_char="#"> - <columns>name, value, path</columns> + <columns>id, name, value, path</columns> <file path="tool-data/id_mapping.loc" /> </table> </tables>
