data_manager_proteore: data_manager/resource

comparison data_manager/resource_building.py @ 10:2f153b41b6fe draft

planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty

author	dchristiany
date	Tue, 23 Oct 2018 07:43:37 -0400
parents	d16a52bf0e5b
children	60cb0a5ae661

comparison

equal deleted inserted replaced

-:6c47b77d89d6
+:2f153b41b6fe
 """
-The purpose of this script is to create source files from different databases to be used in other tools
+The purpose of this script is to create source files from different databases to be used in other proteore tools
 """
 import os, sys, argparse, requests, time, csv, re
 from io import BytesIO
 from zipfile import ZipFile
 tissue_name = "HPA pathology"
 url = "https://www.proteinatlas.org/download/pathology.tsv.zip"
 elif tissue == "HPA_full_atlas":
 tissue_name = "HPA full atlas"
 url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
 output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
 path = os.path.join(target_directory, output_file)
-unzip(url, path)
+unzip(url, path)    #download and save file
-print(str(os.path.isfile(path)))
-tmp=open(path,"r").readlines()
 tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
-data_table_entry = dict(value = tissue, name = tissue_name, path = path)
+tissue_id = tissue_name.replace(" ","_").replace("/","-")
+data_table_entry = dict(id=tissue_id, name = tissue_name, value = tissue, path = path)
 _add_data_table_entry(data_manager_dict, data_table_entry, "protein_atlas")
 #######################################################################################################
 # 2. Peptide Atlas
 cr = csv.reader(decoded_content.splitlines(), delimiter='\t')
 #build dictionary by only keeping uniprot accession (not isoform) as key and sum of observations as value
 uni_dict = build_dictionary(cr)
-tissue_id = "_".join([atlas_build_id, organism_id, sample_category_id,time.strftime("%d-%m-%Y")])
+#columns of data table peptide_atlas
-tissue_value = tissue.split("-")[1]
+date = time.strftime("%d-%m-%Y")
-tissue = tissue.split("-")[1] + "_" +time.strftime("%d-%m-%Y")
+tissue = tissue.split("-")[1]
-tissue_name = " ".join(tissue_value.split("_")) + " " + time.strftime("%d/%m/%Y")
+tissue_id = tissue+"_"+date
+tissue_name = tissue_id.replace("-","/").replace("_"," ")
 path = os.path.join(target_directory,output_file)
 with open(path,"wb") as out :
 w = csv.writer(out,delimiter='\t')
 w.writerow(["Uniprot_AC","nb_obs"])
 w.writerows(uni_dict.items())
-data_table_entry = dict(value = path, name = tissue_name, tissue = tissue)
+data_table_entry = dict(id=tissue_id, name=tissue_name, value = path, tissue = tissue)
 _add_data_table_entry(data_manager_dict, data_table_entry, "peptide_atlas")
 #function to count the number of observations by uniprot id
 def build_dictionary (csv) :
 uni_dict = {}
 with open(path,"w") as out :
 w = csv.writer(out,delimiter='\t')
 w.writerows(tab)
 name_dict={"human" : "Homo sapiens", "mouse" : "Mus musculus", "rat" : "Rattus norvegicus"}
-name = name_dict[species]+" ("+time.strftime("%d-%m-%Y")+")"
+name = name_dict[species]+" "+time.strftime("%d/%m/%Y")
+id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
-data_table_entry = dict(value = species+"_id_mapping_"+ time.strftime("%d-%m-%Y"), name = name, path = path)
+data_table_entry = dict(id=id, name = name, value = species, path = path)
 _add_data_table_entry(data_manager_dict, data_table_entry, "id_mapping")
 def download_from_uniprot_ftp(file,target_directory) :
 ftp_dir = "pub/databases/uniprot/current_release/knowledgebase/idmapping/by_organism/"
 path = os.path.join(target_directory, file)

Mercurial > repos > dchristiany > data_manager_proteore

comparison data_manager/resource_building.py @ 10:2f153b41b6fe draft