Mercurial > repos > dchristiany > data_manager_proteore
changeset 30:b8271b9a1049 draft
planemo upload commit c89c5deac442c0c2aa52b24f2c5af4b290773fc0-dirty
author | dchristiany |
---|---|
date | Mon, 28 Jan 2019 08:14:27 -0500 |
parents | 871a7347ca24 |
children | e248f8f532a7 |
files | data_manager/resource_building.py data_manager/resource_building.xml |
diffstat | 2 files changed, 21 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/resource_building.py Mon Jan 28 05:16:55 2019 -0500 +++ b/data_manager/resource_building.py Mon Jan 28 08:14:27 2019 -0500 @@ -67,24 +67,29 @@ sample_category_id = tissue[0] name = tissue[1] output_file = name+"_"+time.strftime("%d-%m-%Y") + ".tsv" - query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+sample_category_id+ \ - "&display_options=ShowAbundances&organism_id="+organism_id+"&redundancy_constraint=4&presence_level_constraint=1%2C2"+ \ - "&gene_annotation_level_constraint=leaf&QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY" - download = requests.get(query) - decoded_content = download.content.decode('utf-8') - cr = csv.reader(decoded_content.splitlines(), delimiter='\t') + + query="https://db.systemsbiology.net/sbeams/cgi/PeptideAtlas/GetProteins?&atlas_build_id="+ \ + sample_category_id+"&display_options=ShowAbundances&organism_id="+organism_id+ \ + "&redundancy_constraint=4&presence_level_constraint=1%2C2&gene_annotation_level_constraint=leaf\ + &QUERY_NAME=AT_GetProteins&action=QUERY&output_mode=tsv&apply_action=QUERY" - #build dictionary by only keeping uniprot accession (not isoform) as key and sum of observations as value + print (query) + + with requests.Session() as s: + download = s.get(query) + decoded_content = download.content.decode('utf-8') + cr = csv.reader(decoded_content.splitlines(), delimiter='\t') + #cr = list(cr) + uni_dict = build_dictionary(cr) #columns of data table peptide_atlas date = time.strftime("%d-%m-%Y") - tissue = tissue.split("-")[1] - tissue_id = tissue+"_"+date + tissue_id = name+"_"+date tissue_name = tissue_id.replace("-","/").replace("_"," ") - path = os.path.join(target_directory,output_file) + path = os.path.join(output_file) - with open(path,"wb") as out : + with open(path,"w") as out : w = csv.writer(out,delimiter='\t') w.writerow(["Uniprot_AC","nb_obs"]) w.writerows(uni_dict.items()) @@ -96,11 +101,11 @@ def build_dictionary (csv) : uni_dict = {} for line in csv : - if "-" not in line[2] and check_uniprot_access(line[2]) : - if line[2] in uni_dict : - uni_dict[line[2]] += int(line[4]) + if "-" not in line[0] and check_uniprot_access(line[0]) : + if line[0] in uni_dict : + uni_dict[line[0]] += int(line[5]) else : - uni_dict[line[2]] = int(line[4]) + uni_dict[line[0]] = int(line[5]) return uni_dict @@ -112,8 +117,6 @@ else : return False - - ####################################################################################################### # 3. ID mapping file #######################################################################################################
--- a/data_manager/resource_building.xml Mon Jan 28 05:16:55 2019 -0500 +++ b/data_manager/resource_building.xml Mon Jan 28 08:14:27 2019 -0500 @@ -1,4 +1,4 @@ -<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.01.25" tool_type="manage_data"> +<tool id="data_manager_proteore" name="Get source files for proteore tools" version="2019.01.25.1" tool_type="manage_data"> <description> to create or update reference files for proteore tools </description>