changeset 22:391da0820827 draft

Uploaded
author ieguinoa
date Tue, 10 Nov 2020 21:31:27 +0000
parents d7a88a8a8f5f
children fb51bbda07d7
files README.md ena_upload.xml process_xlsx.py
diffstat 3 files changed, 63 insertions(+), 12 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Fri Nov 06 20:14:09 2020 +0000
+++ b/README.md	Tue Nov 10 21:31:27 2020 +0000
@@ -2,3 +2,22 @@
 Galaxy wrapper for ena-cli-upload
 
 This tool is shipped in a ready to use Galaxy container found [here](https://github.com/ELIXIR-Belgium/ena-upload-container).
+
+## Set up user credentials on Galaxy
+
+To enable users to set their credentials for this tool,
+make sure the file `config/user_preferences_extra_conf.yml` has the following section:
+
+```
+    ena_account:
+        description: Your ENA Brokering account details
+        inputs:
+            - name: webin_id
+              label: webin_id
+              type: text
+              required: False
+            - name: password
+              label: Password
+              type:  password
+              required: False
+```
--- a/ena_upload.xml	Fri Nov 06 20:14:09 2020 +0000
+++ b/ena_upload.xml	Tue Nov 10 21:31:27 2020 +0000
@@ -9,8 +9,22 @@
   </requirements>
   <command detect_errors="exit_code"><![CDATA[
 cwd=\$(pwd);
-#set webin_id = os.environ.get('WEBIN_ID', None)
-#set webin_secret = os.environ.get('WEBIN_SECRET', None)
+#set $use_secret = "False"
+#set $webin_id = $__user__.extra_preferences.get('ena_account|webin_id', "")
+#set $password = $__user__.extra_preferences.get('ena_account|password', "")
+#if $webin_id == "":
+    ## No user defined credentials, try with global webin_id + secret file path
+    #set webin_id = os.environ.get('WEBIN_ID', "")
+    #set webin_secret = os.environ.get('WEBIN_SECRET', None)
+    #set use_secret = "True"
+    #if webin_id == "":
+        echo "No ENA credentials defined. Set your credentials via: User -> Preferences -> Manage Information";
+        exit 1;
+    #end if
+#end if
+
+echo "Submissions will be made with user: $webin_id. You can set your credentials via: User -> Preferences -> Manage Information";
+
 #set working_dir = os.getcwd()
 #set $dry_run_option = "False"
 #set viral_submission = "False"
@@ -77,7 +91,11 @@
     --action 'add'
     --center '$action_options.center'
     --webin_id '$webin_id'
-    --secret '$webin_secret'
+    #if $use_secret == "False":
+        --password $password
+    #else:
+        --secret $webin_secret
+    #end if
     --data
     #for $dataset in $files_to_upload:
       $dataset
@@ -155,9 +173,9 @@
                 <param name="submit_dev" type="boolean" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted " />
                 <conditional name="input_format_conditional">
                     <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">
-                        <option value="user_generated_tables" selected="False">User generated tables of studies/experiments/runs/samples</option>
-                        <option value="excel_tables" selected="False">User generated metadata tables based on Excel templates</option>
-                        <option value="build_tables" selected="True">Interactive generation of studies structure from dataset</option>
+                        <option value="excel_tables" selected="True">User generated metadata tables based on Excel templates</option>
+                        <option value="build_tables" selected="False">Interactive generation of the study structure (recomended for small studies)</option>
+                        <option value="user_generated_tables" selected="False">User generated tabular files of studies - samples - experiments - runs </option>
                     </param>
                     <when value="excel_tables">
                         <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />
--- a/process_xlsx.py	Fri Nov 06 20:14:09 2020 +0000
+++ b/process_xlsx.py	Tue Nov 10 21:31:27 2020 +0000
@@ -2,26 +2,40 @@
 import json
 import os
 import pathlib
+import sys
 import xlrd
+
 from datetime import datetime
 
 
 def extract_data(xl_sheet, expected_columns):
-    # Check that the columns in the sheet correspond to what I expect
+    # Check that the columns I expect are present in the sheet (any order and mixed with others)
     # Just a verification that the user filled the correct template
-    row_id = 0
+    sheet_columns = {}
+    for sh_col in range(xl_sheet.ncols):
+        if xl_sheet.cell(0, sh_col).value in expected_columns:
+            if xl_sheet.cell(0, sh_col).value in sheet_columns.keys():
+                sys.exit("Duplicated columns")
+            else:
+                sheet_columns[xl_sheet.cell(0, sh_col).value] = sh_col
     for col in range(len(expected_columns)):
-        assert expected_columns[col] == xl_sheet.cell(row_id, col).value
+        assert expected_columns[col] in sheet_columns.keys(), "Expected column %s not found" %expected_columns[col]
+
+    # fetch rows in a dict
     data_dict = {}
+    # the first of the expected columns will be the index
+    index_col = sheet_columns[expected_columns[0]]
+    ## skip first 2 rows: column names + comments rows
     for row_id in range(2,xl_sheet.nrows):
         row_dict = {}
         for col in range(1,len(expected_columns)):
-            row_dict[expected_columns[col]] = xl_sheet.cell(row_id,col).value
+            # row_dict[expected_columns[col]] = xl_sheet.cell(row_id,col).value
+            sheet_col_index = sheet_columns[expected_columns[col]]
+            row_dict[expected_columns[col]] = xl_sheet.cell(row_id,sheet_col_index).value
         # should I check for duplicate alias/ids?
-        data_dict[xl_sheet.cell(row_id, 0).value] = row_dict
+        data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict
     return data_dict
 
-
 parser = argparse.ArgumentParser()
 parser.add_argument('--form',dest='xlsx_path', required=True)
 parser.add_argument('--out_dir',dest='out_path', required=True)