Mercurial > repos > ieguinoa > ena_upload
changeset 22:391da0820827 draft
Uploaded
author | ieguinoa |
---|---|
date | Tue, 10 Nov 2020 21:31:27 +0000 |
parents | d7a88a8a8f5f |
children | fb51bbda07d7 |
files | README.md ena_upload.xml process_xlsx.py |
diffstat | 3 files changed, 63 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/README.md Fri Nov 06 20:14:09 2020 +0000 +++ b/README.md Tue Nov 10 21:31:27 2020 +0000 @@ -2,3 +2,22 @@ Galaxy wrapper for ena-cli-upload This tool is shipped in a ready to use Galaxy container found [here](https://github.com/ELIXIR-Belgium/ena-upload-container). + +## Set up user credentials on Galaxy + +To enable users to set their credentials for this tool, +make sure the file `config/user_preferences_extra_conf.yml` has the following section: + +``` + ena_account: + description: Your ENA Brokering account details + inputs: + - name: webin_id + label: webin_id + type: text + required: False + - name: password + label: Password + type: password + required: False +```
--- a/ena_upload.xml Fri Nov 06 20:14:09 2020 +0000 +++ b/ena_upload.xml Tue Nov 10 21:31:27 2020 +0000 @@ -9,8 +9,22 @@ </requirements> <command detect_errors="exit_code"><![CDATA[ cwd=\$(pwd); -#set webin_id = os.environ.get('WEBIN_ID', None) -#set webin_secret = os.environ.get('WEBIN_SECRET', None) +#set $use_secret = "False" +#set $webin_id = $__user__.extra_preferences.get('ena_account|webin_id', "") +#set $password = $__user__.extra_preferences.get('ena_account|password', "") +#if $webin_id == "": + ## No user defined credentials, try with global webin_id + secret file path + #set webin_id = os.environ.get('WEBIN_ID', "") + #set webin_secret = os.environ.get('WEBIN_SECRET', None) + #set use_secret = "True" + #if webin_id == "": + echo "No ENA credentials defined. Set your credentials via: User -> Preferences -> Manage Information"; + exit 1; + #end if +#end if + +echo "Submissions will be made with user: $webin_id. You can set your credentials via: User -> Preferences -> Manage Information"; + #set working_dir = os.getcwd() #set $dry_run_option = "False" #set viral_submission = "False" @@ -77,7 +91,11 @@ --action 'add' --center '$action_options.center' --webin_id '$webin_id' - --secret '$webin_secret' + #if $use_secret == "False": + --password $password + #else: + --secret $webin_secret + #end if --data #for $dataset in $files_to_upload: $dataset @@ -155,9 +173,9 @@ <param name="submit_dev" type="boolean" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted " /> <conditional name="input_format_conditional"> <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?"> - <option value="user_generated_tables" selected="False">User generated tables of studies/experiments/runs/samples</option> - <option value="excel_tables" selected="False">User generated metadata tables based on Excel templates</option> - <option value="build_tables" selected="True">Interactive generation of studies structure from dataset</option> + <option value="excel_tables" selected="True">User generated metadata tables based on Excel templates</option> + <option value="build_tables" selected="False">Interactive generation of the study structure (recomended for small studies)</option> + <option value="user_generated_tables" selected="False">User generated tabular files of studies - samples - experiments - runs </option> </param> <when value="excel_tables"> <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />
--- a/process_xlsx.py Fri Nov 06 20:14:09 2020 +0000 +++ b/process_xlsx.py Tue Nov 10 21:31:27 2020 +0000 @@ -2,26 +2,40 @@ import json import os import pathlib +import sys import xlrd + from datetime import datetime def extract_data(xl_sheet, expected_columns): - # Check that the columns in the sheet correspond to what I expect + # Check that the columns I expect are present in the sheet (any order and mixed with others) # Just a verification that the user filled the correct template - row_id = 0 + sheet_columns = {} + for sh_col in range(xl_sheet.ncols): + if xl_sheet.cell(0, sh_col).value in expected_columns: + if xl_sheet.cell(0, sh_col).value in sheet_columns.keys(): + sys.exit("Duplicated columns") + else: + sheet_columns[xl_sheet.cell(0, sh_col).value] = sh_col for col in range(len(expected_columns)): - assert expected_columns[col] == xl_sheet.cell(row_id, col).value + assert expected_columns[col] in sheet_columns.keys(), "Expected column %s not found" %expected_columns[col] + + # fetch rows in a dict data_dict = {} + # the first of the expected columns will be the index + index_col = sheet_columns[expected_columns[0]] + ## skip first 2 rows: column names + comments rows for row_id in range(2,xl_sheet.nrows): row_dict = {} for col in range(1,len(expected_columns)): - row_dict[expected_columns[col]] = xl_sheet.cell(row_id,col).value + # row_dict[expected_columns[col]] = xl_sheet.cell(row_id,col).value + sheet_col_index = sheet_columns[expected_columns[col]] + row_dict[expected_columns[col]] = xl_sheet.cell(row_id,sheet_col_index).value # should I check for duplicate alias/ids? - data_dict[xl_sheet.cell(row_id, 0).value] = row_dict + data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict return data_dict - parser = argparse.ArgumentParser() parser.add_argument('--form',dest='xlsx_path', required=True) parser.add_argument('--out_dir',dest='out_path', required=True)