Mercurial > repos > tduigou > save_to_db
comparison save_to_db.py @ 4:c7a7520afb4b draft
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
| author | tduigou |
|---|---|
| date | Mon, 02 Jun 2025 09:48:14 +0000 |
| parents | 034686b5bc15 |
| children | f934215bdb78 |
comparison
equal
deleted
inserted
replaced
| 3:eca0d710bbe9 | 4:c7a7520afb4b |
|---|---|
| 3 import argparse | 3 import argparse |
| 4 import socket | 4 import socket |
| 5 import os | 5 import os |
| 6 import re | 6 import re |
| 7 import pandas as pd | 7 import pandas as pd |
| 8 import json | |
| 8 from sqlalchemy import create_engine, inspect | 9 from sqlalchemy import create_engine, inspect |
| 9 from sqlalchemy.sql import text | 10 from sqlalchemy.sql import text |
| 10 from sqlalchemy.engine.url import make_url | 11 from sqlalchemy.engine.url import make_url |
| 11 from sqlalchemy.exc import OperationalError | 12 from sqlalchemy.exc import OperationalError |
| 13 | |
| 14 def resolve_parameters(user_params: dict, json_params: dict, keys: list): | |
| 15 resolved = {} | |
| 16 for key in keys: | |
| 17 # Prefer user parameter if it's provided (not None or empty string) | |
| 18 if key in user_params and user_params[key]: | |
| 19 resolved[key] = user_params[key] | |
| 20 else: | |
| 21 resolved[key] = json_params.get(f"JSON_{key}") | |
| 22 return resolved | |
| 12 | 23 |
| 13 def fix_db_uri(uri): | 24 def fix_db_uri(uri): |
| 14 """Replace __at__ with @ in the URI if needed.""" | 25 """Replace __at__ with @ in the URI if needed.""" |
| 15 return uri.replace("__at__", "@") | 26 return uri.replace("__at__", "@") |
| 16 | 27 |
| 175 parser.add_argument("--db_uri", required=True, help="Database URI connection string") | 186 parser.add_argument("--db_uri", required=True, help="Database URI connection string") |
| 176 parser.add_argument("--table", required=True, help="Table name in the database") | 187 parser.add_argument("--table", required=True, help="Table name in the database") |
| 177 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database") | 188 parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database") |
| 178 parser.add_argument("--output", required=True, help="Text report") | 189 parser.add_argument("--output", required=True, help="Text report") |
| 179 parser.add_argument("--file_name_mapping", required=True, help="real fragments names") | 190 parser.add_argument("--file_name_mapping", required=True, help="real fragments names") |
| 191 parser.add_argument("--json_conf", required=False, help="JSON config file with DB parameters") | |
| 180 args = parser.parse_args() | 192 args = parser.parse_args() |
| 181 | 193 |
| 182 # Start the Docker container (if not already running) | 194 # Load JSON config if provided |
| 195 json_config = {} | |
| 196 if args.json_conf != 'None' or '': | |
| 197 with open(args.json_conf, "r") as f: | |
| 198 json_config = json.load(f) | |
| 199 | |
| 200 # Prefer user input; fallback to JSON_ values if not provided | |
| 201 user_params = { | |
| 202 "table": args.table, | |
| 203 "sequence_column": args.sequence_column, | |
| 204 "annotation_column": args.annotation_column, | |
| 205 "fragment_column": args.fragment_column, | |
| 206 "db_uri": args.db_uri | |
| 207 } | |
| 208 | |
| 209 keys = ["table", "sequence_column", "annotation_column", "fragment_column", "db_uri"] | |
| 210 resolved = resolve_parameters(user_params, json_config, keys) | |
| 211 | |
| 212 # Unpack resolved parameters | |
| 213 table = resolved["table"] | |
| 214 sequence_column = resolved["sequence_column"] | |
| 215 annotation_column = resolved["annotation_column"] | |
| 216 fragment_column = resolved["fragment_column"] | |
| 217 db_uri = fix_db_uri(resolved["db_uri"]) | |
| 218 | |
| 219 # Prepare gb files | |
| 183 gb_file_list = [f.strip() for f in args.input.split(",") if f.strip()] | 220 gb_file_list = [f.strip() for f in args.input.split(",") if f.strip()] |
| 184 db_uri = fix_db_uri(args.db_uri) | 221 |
| 222 # Start and wait for DB | |
| 185 db_name = extract_db_name(db_uri) | 223 db_name = extract_db_name(db_uri) |
| 186 start_postgres_container(db_name) | 224 start_postgres_container(db_name) |
| 187 | |
| 188 # Wait until the database is ready | |
| 189 wait_for_db(db_uri) | 225 wait_for_db(db_uri) |
| 190 | 226 |
| 191 # Fetch annotations from the database and save as gb | 227 # Push annotations |
| 192 push_gb_annotations(gb_file_list, args.sequence_column, args.annotation_column, db_uri, args.table, args.fragment_column, args.output, args.file_name_mapping) | 228 push_gb_annotations( |
| 229 gb_file_list, | |
| 230 sequence_column, | |
| 231 annotation_column, | |
| 232 db_uri, | |
| 233 table, | |
| 234 fragment_column, | |
| 235 args.output, | |
| 236 args.file_name_mapping | |
| 237 ) | |
| 193 | 238 |
| 194 if __name__ == "__main__": | 239 if __name__ == "__main__": |
| 195 main() | 240 main() |
