diff data_manager/resource_building.py @ 10:2f153b41b6fe draft

planemo upload commit e5e768b479ddc6b36270a1b5b0443a4c80d693bc-dirty
author dchristiany
date Tue, 23 Oct 2018 07:43:37 -0400
parents d16a52bf0e5b
children 60cb0a5ae661
line wrap: on
line diff
--- a/data_manager/resource_building.py	Fri Oct 19 05:42:11 2018 -0400
+++ b/data_manager/resource_building.py	Tue Oct 23 07:43:37 2018 -0400
@@ -1,5 +1,5 @@
 """
-The purpose of this script is to create source files from different databases to be used in other tools
+The purpose of this script is to create source files from different databases to be used in other proteore tools
 """
 
 import os, sys, argparse, requests, time, csv, re
@@ -44,13 +44,14 @@
     elif tissue == "HPA_full_atlas":
         tissue_name = "HPA full atlas"
         url = "https://www.proteinatlas.org/download/proteinatlas.tsv.zip"
+    
     output_file = tissue +"_"+ time.strftime("%d-%m-%Y") + ".tsv"
     path = os.path.join(target_directory, output_file)
-    unzip(url, path)
-    print(str(os.path.isfile(path)))
-    tmp=open(path,"r").readlines()
+    unzip(url, path)    #download and save file
     tissue_name = tissue_name + " " + time.strftime("%d/%m/%Y")
-    data_table_entry = dict(value = tissue, name = tissue_name, path = path)
+    tissue_id = tissue_name.replace(" ","_").replace("/","-")
+
+    data_table_entry = dict(id=tissue_id, name = tissue_name, value = tissue, path = path)
     _add_data_table_entry(data_manager_dict, data_table_entry, "protein_atlas")
 
 
@@ -76,10 +77,11 @@
     #build dictionary by only keeping uniprot accession (not isoform) as key and sum of observations as value
     uni_dict = build_dictionary(cr)
 
-    tissue_id = "_".join([atlas_build_id, organism_id, sample_category_id,time.strftime("%d-%m-%Y")])
-    tissue_value = tissue.split("-")[1]
-    tissue = tissue.split("-")[1] + "_" +time.strftime("%d-%m-%Y")
-    tissue_name = " ".join(tissue_value.split("_")) + " " + time.strftime("%d/%m/%Y")
+    #columns of data table peptide_atlas
+    date = time.strftime("%d-%m-%Y")
+    tissue = tissue.split("-")[1]
+    tissue_id = tissue+"_"+date
+    tissue_name = tissue_id.replace("-","/").replace("_"," ")
     path = os.path.join(target_directory,output_file)
 
     with open(path,"wb") as out :
@@ -87,7 +89,7 @@
         w.writerow(["Uniprot_AC","nb_obs"])
         w.writerows(uni_dict.items())
         
-    data_table_entry = dict(value = path, name = tissue_name, tissue = tissue)
+    data_table_entry = dict(id=tissue_id, name=tissue_name, value = path, tissue = tissue)
     _add_data_table_entry(data_manager_dict, data_table_entry, "peptide_atlas")
 
 #function to count the number of observations by uniprot id
@@ -215,9 +217,10 @@
         w.writerows(tab)
 
     name_dict={"human" : "Homo sapiens", "mouse" : "Mus musculus", "rat" : "Rattus norvegicus"}
-    name = name_dict[species]+" ("+time.strftime("%d-%m-%Y")+")"
+    name = name_dict[species]+" "+time.strftime("%d/%m/%Y")
+    id = species+"_id_mapping_"+ time.strftime("%d-%m-%Y")
 
-    data_table_entry = dict(value = species+"_id_mapping_"+ time.strftime("%d-%m-%Y"), name = name, path = path)
+    data_table_entry = dict(id=id, name = name, value = species, path = path)
     _add_data_table_entry(data_manager_dict, data_table_entry, "id_mapping")
 
 def download_from_uniprot_ftp(file,target_directory) :