# HG changeset patch
# User tduigou
# Date 1745488596 0
# Node ID 034686b5bc159f9e90519d9cc219885248d8d64d
planemo upload for repository https://github.com/brsynth commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
diff -r 000000000000 -r 034686b5bc15 output.html
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/output.html Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,291 @@
+
+
+
+
+
+
+ Test Results (powered by Planemo)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 034686b5bc15 output.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/output.json Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,129 @@
+{
+ "summary": {
+ "num_errors": 0,
+ "num_failures": 0,
+ "num_skips": 0,
+ "num_tests": 1
+ },
+ "tests": [
+ {
+ "data": {
+ "inputs": {
+ "annotation_column": "annotation",
+ "db_uri": "postgresql://postgres:RK17@localhost:5432/test_fragments_db",
+ "fragment_column": "fragment",
+ "genbank_files": [
+ {
+ "id": "0ed824750a9f5374",
+ "src": "hda"
+ },
+ {
+ "id": "93c5e169024bda92",
+ "src": "hda"
+ },
+ {
+ "id": "7e79446a5abd8b5f",
+ "src": "hda"
+ },
+ {
+ "id": "81603bca0e110055",
+ "src": "hda"
+ },
+ {
+ "id": "f1c08c8102faa4bc",
+ "src": "hda"
+ }
+ ],
+ "sequence_column": "sequence",
+ "table": "sample"
+ },
+ "job": {
+ "command_line": "python '/home/rkhaled/galaxytools/tools/save_to_db/save_to_db.py' --input '/tmp/tmp_mv7gt3c/files/5/2/2/dataset_5228856a-39be-4f27-afa9-d1f613dea185.dat,/tmp/tmp_mv7gt3c/files/f/a/a/dataset_faa1d0aa-2bb0-47c0-bc3e-949253dd5c3c.dat,/tmp/tmp_mv7gt3c/files/9/6/1/dataset_961fe859-a187-4b45-98ac-bc9c8bd29463.dat,/tmp/tmp_mv7gt3c/files/a/5/c/dataset_a5c56d98-0435-435b-ad8a-01906b2ab961.dat,/tmp/tmp_mv7gt3c/files/2/a/7/dataset_2a788c0d-f89d-4961-95c4-423fd51249f0.dat' --sequence_column 'sequence' --annotation_column 'annotation' --db_uri 'postgresql://postgres:RK17__at__localhost:5432/test_fragments_db' --table 'sample' --fragment_column 'fragment' --output '/tmp/tmp_mv7gt3c/job_working_directory/000/6/outputs/dataset_c2d55cf8-9c51-42f9-84da-9aa06323c7ce.dat' --file_name_mapping '/tmp/tmp_mv7gt3c/files/5/2/2/dataset_5228856a-39be-4f27-afa9-d1f613dea185.dat:p7_L7Ae-Weiss.gb,/tmp/tmp_mv7gt3c/files/f/a/a/dataset_faa1d0aa-2bb0-47c0-bc3e-949253dd5c3c.dat:p6_Nt-IgKLsequence.gb,/tmp/tmp_mv7gt3c/files/9/6/1/dataset_961fe859-a187-4b45-98ac-bc9c8bd29463.dat:p6_Kozak-ATG.gb,/tmp/tmp_mv7gt3c/files/a/5/c/dataset_a5c56d98-0435-435b-ad8a-01906b2ab961.dat:p4_Kt-L7Ae-Weiss.gb,/tmp/tmp_mv7gt3c/files/2/a/7/dataset_2a788c0d-f89d-4961-95c4-423fd51249f0.dat:HC_Amp_ccdB.gb'",
+ "command_version": "",
+ "copied_from_job_id": null,
+ "create_time": "2025-04-24T09:49:34.459208",
+ "dependencies": [],
+ "exit_code": 0,
+ "external_id": "84720",
+ "galaxy_version": "24.2",
+ "handler": null,
+ "history_id": "0ed824750a9f5374",
+ "id": "016b212ed10ed695",
+ "inputs": {
+ "genbank_files": {
+ "id": "0ed824750a9f5374",
+ "src": "hda",
+ "uuid": "5228856a-39be-4f27-afa9-d1f613dea185"
+ },
+ "genbank_files1": {
+ "id": "0ed824750a9f5374",
+ "src": "hda",
+ "uuid": "5228856a-39be-4f27-afa9-d1f613dea185"
+ },
+ "genbank_files2": {
+ "id": "93c5e169024bda92",
+ "src": "hda",
+ "uuid": "faa1d0aa-2bb0-47c0-bc3e-949253dd5c3c"
+ },
+ "genbank_files3": {
+ "id": "7e79446a5abd8b5f",
+ "src": "hda",
+ "uuid": "961fe859-a187-4b45-98ac-bc9c8bd29463"
+ },
+ "genbank_files4": {
+ "id": "81603bca0e110055",
+ "src": "hda",
+ "uuid": "a5c56d98-0435-435b-ad8a-01906b2ab961"
+ },
+ "genbank_files5": {
+ "id": "f1c08c8102faa4bc",
+ "src": "hda",
+ "uuid": "2a788c0d-f89d-4961-95c4-423fd51249f0"
+ }
+ },
+ "job_messages": [],
+ "job_metrics": [],
+ "job_runner_name": null,
+ "job_stderr": "",
+ "job_stdout": "",
+ "model_class": "Job",
+ "output_collections": {},
+ "outputs": {
+ "output": {
+ "id": "016b212ed10ed695",
+ "src": "hda",
+ "uuid": "c2d55cf8-9c51-42f9-84da-9aa06323c7ce"
+ }
+ },
+ "params": {
+ "__input_ext": "\"input\"",
+ "annotation_column": "\"annotation\"",
+ "chromInfo": "\"/tmp/tmp_mv7gt3c/galaxy-dev/tool-data/shared/ucsc/chrom/?.len\"",
+ "db_uri": "\"postgresql://postgres:RK17@localhost:5432/test_fragments_db\"",
+ "dbkey": "\"?\"",
+ "fragment_column": "\"fragment\"",
+ "sequence_column": "\"sequence\"",
+ "table": "\"sample\""
+ },
+ "state": "ok",
+ "stderr": "",
+ "stdout": "Container 'test_fragments_db' is already running.\nConnected to database.\nProcessing file: dataset_5228856a-39be-4f27-afa9-d1f613dea185.dat\nProcessing file: dataset_faa1d0aa-2bb0-47c0-bc3e-949253dd5c3c.dat\nProcessing file: dataset_961fe859-a187-4b45-98ac-bc9c8bd29463.dat\nProcessing file: dataset_a5c56d98-0435-435b-ad8a-01906b2ab961.dat\nProcessing file: dataset_2a788c0d-f89d-4961-95c4-423fd51249f0.dat\nInserted 5 fragments.\nFragment names written to '/tmp/tmp_mv7gt3c/job_working_directory/000/6/outputs/dataset_c2d55cf8-9c51-42f9-84da-9aa06323c7ce.dat'.\n",
+ "tool_id": "save_to_db",
+ "tool_stderr": "",
+ "tool_stdout": "Container 'test_fragments_db' is already running.\nConnected to database.\nProcessing file: dataset_5228856a-39be-4f27-afa9-d1f613dea185.dat\nProcessing file: dataset_faa1d0aa-2bb0-47c0-bc3e-949253dd5c3c.dat\nProcessing file: dataset_961fe859-a187-4b45-98ac-bc9c8bd29463.dat\nProcessing file: dataset_a5c56d98-0435-435b-ad8a-01906b2ab961.dat\nProcessing file: dataset_2a788c0d-f89d-4961-95c4-423fd51249f0.dat\nInserted 5 fragments.\nFragment names written to '/tmp/tmp_mv7gt3c/job_working_directory/000/6/outputs/dataset_c2d55cf8-9c51-42f9-84da-9aa06323c7ce.dat'.\n",
+ "update_time": "2025-04-24T09:49:41.678113",
+ "user_email": "planemo@galaxyproject.org",
+ "user_id": "0ed824750a9f5374"
+ },
+ "status": "success",
+ "test_index": 0,
+ "time_seconds": 26.82854700088501,
+ "tool_id": "save_to_db",
+ "tool_version": "0.1.0+galaxy0"
+ },
+ "has_data": true,
+ "id": "save_to_db-0"
+ }
+ ],
+ "version": "0.1"
+}
\ No newline at end of file
diff -r 000000000000 -r 034686b5bc15 save_to_db.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/save_to_db.py Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,195 @@
+import subprocess
+import time
+import argparse
+import socket
+import os
+import re
+import pandas as pd
+from sqlalchemy import create_engine, inspect
+from sqlalchemy.sql import text
+from sqlalchemy.engine.url import make_url
+from sqlalchemy.exc import OperationalError
+
+def fix_db_uri(uri):
+ """Replace __at__ with @ in the URI if needed."""
+ return uri.replace("__at__", "@")
+
+def is_port_in_use(port):
+ """Check if a TCP port is already in use on localhost."""
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ return s.connect_ex(('localhost', port)) == 0
+
+def extract_db_name(uri):
+ """Extract the database name from the SQLAlchemy URI."""
+ url = make_url(uri)
+ return url.database
+
+def start_postgres_container(db_name):
+ """Start a PostgreSQL container with the given database name as the container name."""
+ container_name = db_name
+
+ # Check if container is already running
+ container_running = subprocess.run(
+ f"docker ps -q -f name={container_name}", shell=True, capture_output=True, text=True
+ )
+
+ if container_running.stdout.strip():
+ print(f"Container '{container_name}' is already running.")
+ return
+
+ # Check if container exists (stopped)
+ container_exists = subprocess.run(
+ f"docker ps -a -q -f name={container_name}", shell=True, capture_output=True, text=True
+ )
+
+ if container_exists.stdout.strip():
+ print(f"Starting existing container '{container_name}'...")
+ subprocess.run(f"docker start {container_name}", shell=True)
+ print(f"PostgreSQL Docker container '{container_name}' activated.")
+ return
+
+ # If container does not exist, create and start a new one
+ port = 5432 if not is_port_in_use(5432) else 5433
+ postgres_password = os.getenv("POSTGRES_PASSWORD", "RK17")
+
+ start_command = [
+ "docker", "run", "--name", container_name,
+ "-e", f"POSTGRES_PASSWORD={postgres_password}",
+ "-p", f"{port}:5432",
+ "-d", "postgres"
+ ]
+
+ try:
+ subprocess.run(start_command, check=True)
+ print(f"PostgreSQL Docker container '{container_name}' started on port {port}.")
+ except subprocess.CalledProcessError as e:
+ print(f"Failed to start Docker container: {e}")
+
+def wait_for_db(uri, timeout=60):
+ """Try connecting to the DB until it works or timeout."""
+ engine = create_engine(uri)
+ start_time = time.time()
+ while time.time() - start_time < timeout:
+ try:
+ with engine.connect():
+ print("Connected to database.")
+ return
+ except OperationalError:
+ print("Database not ready, retrying...")
+ time.sleep(2)
+ raise Exception("Database connection failed after timeout.")
+
+def push_gb_annotations(gb_files, sequence_column, annotation_column, db_uri, table_name, fragment_column_name, output, file_name_mapping):
+ """Push GenBank file content into the database if the fragment is not already present."""
+ db_uri = fix_db_uri(db_uri)
+ engine = create_engine(db_uri)
+ inserted_fragments = []
+
+ try:
+ # Parse the file_name_mapping string into a dictionary {base_file_name: fragment_name}
+ file_name_mapping_dict = {
+ os.path.basename(path): os.path.splitext(fragment_name)[0]
+ for mapping in file_name_mapping.split(",")
+ for path, fragment_name in [mapping.split(":")]
+ }
+
+ #print("File name mapping dictionary:")
+ #print(file_name_mapping_dict) # Debugging: Print the mapping dictionary
+
+ with engine.begin() as connection:
+ inspector = inspect(engine)
+ columns = [col['name'] for col in inspector.get_columns(table_name)]
+
+ if fragment_column_name not in columns:
+ raise ValueError(f"Fragment column '{fragment_column_name}' not found in table '{table_name}'.")
+
+ # Get existing fragments
+ all_rows = connection.execute(text(f"SELECT {fragment_column_name} FROM {table_name}")).fetchall()
+ existing_fragments = {row[0] for row in all_rows}
+
+ insert_rows = []
+
+ for gb_file in gb_files:
+ # Extract base file name (just the file name, not the full path)
+ real_file_name = os.path.basename(gb_file)
+
+ print(f"Processing file: {real_file_name}") # Debugging: Log the current file
+
+ # Get the corresponding fragment name from the mapping
+ fragment_name = file_name_mapping_dict.get(real_file_name)
+
+ if not fragment_name:
+ raise ValueError(f"Fragment name not found for file '{real_file_name}' in file_name_mapping.")
+
+ # If the fragment is already in the DB, raise an error and stop the process
+ if fragment_name in existing_fragments:
+ raise RuntimeError(f"Fatal Error: Fragment '{fragment_name}' already exists in DB. Stopping the process.")
+
+ with open(gb_file, "r") as f:
+ content = f.read()
+
+ origin_match = re.search(r"^ORIGIN.*$", content, flags=re.MULTILINE)
+ if not origin_match:
+ raise ValueError(f"ORIGIN section not found in file: {gb_file}")
+
+ origin_start = origin_match.start()
+ annotation_text = content[:origin_start].strip()
+ sequence_text = content[origin_start:].strip()
+
+ values = {}
+ values[fragment_column_name] = fragment_name
+ values[annotation_column] = annotation_text
+ values[sequence_column] = sequence_text
+
+ insert_rows.append(values)
+ inserted_fragments.append(fragment_name)
+
+ # Insert the rows into the database
+ for values in insert_rows:
+ col_names = ", ".join(values.keys())
+ placeholders = ", ".join([f":{key}" for key in values.keys()])
+ insert_stmt = text(f"INSERT INTO {table_name} ({col_names}) VALUES ({placeholders})")
+
+ #print(f"Inserting into DB: {values}") # Debugging print statement
+ result = connection.execute(insert_stmt, values)
+
+ #print(f"Insert result: {result.rowcount if hasattr(result, 'rowcount') else 'N/A'}") # Debugging the row count
+
+ print(f"Inserted {len(insert_rows)} fragments.")
+
+ # Write inserted fragment names to a text file
+ with open(output, "w") as log_file:
+ for frag in inserted_fragments:
+ log_file.write(f"{frag}\n")
+ print(f"Fragment names written to '{output}'.")
+
+ except Exception as e:
+ print(f"Error during GB file insertion: {e}")
+ raise
+
+def main():
+ parser = argparse.ArgumentParser(description="Fetch annotations from PostgreSQL database and save as JSON.")
+ parser.add_argument("--input", required=True, help="Input gb files")
+ parser.add_argument("--sequence_column", required=True, help="DB column contains sequence for ganbank file")
+ parser.add_argument("--annotation_column", required=True, help="DB column contains head for ganbank file")
+ parser.add_argument("--db_uri", required=True, help="Database URI connection string")
+ parser.add_argument("--table", required=True, help="Table name in the database")
+ parser.add_argument("--fragment_column", required=True, help="Fragment column name in the database")
+ parser.add_argument("--output", required=True, help="Text report")
+ parser.add_argument("--file_name_mapping", required=True, help="real fragments names")
+ args = parser.parse_args()
+
+ # Start the Docker container (if not already running)
+ gb_file_list = [f.strip() for f in args.input.split(",") if f.strip()]
+ db_uri = fix_db_uri(args.db_uri)
+ db_name = extract_db_name(db_uri)
+ start_postgres_container(db_name)
+
+ # Wait until the database is ready
+ wait_for_db(db_uri)
+
+ # Fetch annotations from the database and save as gb
+ push_gb_annotations(gb_file_list, args.sequence_column, args.annotation_column, db_uri, args.table, args.fragment_column, args.output, args.file_name_mapping)
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 034686b5bc15 save_to_db.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/save_to_db.xml Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,73 @@
+
+ Save data to SQL DB
+
+ 0
+ 0.1.0
+
+
+ pandas
+ sqlalchemy
+ psycopg2
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @unpublished{save_to_db
+ author = {Ramiz Khaled},
+ title = {{save_to_db}},
+ url = {https://github.com/brsynth/},
+ }
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 034686b5bc15 test-data/HC_Amp_ccdB.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/HC_Amp_ccdB.gb Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,109 @@
+LOCUS Exported 2721 bp ds-DNA circular SYN 14-SEP-2017
+DEFINITION synthetic circular DNA.
+ACCESSION .
+VERSION .
+KEYWORDS HC_Amp_ccdB(1-25).gb
+SOURCE synthetic DNA construct
+ ORGANISM synthetic DNA construct
+REFERENCE 1 (bases 1 to 2721)
+ AUTHORS Trial User
+ TITLE Direct Submission
+ JOURNAL Exported Sep 14, 2017 from SnapGene Viewer 4.0.2
+ http://www.snapgene.com
+FEATURES Location/Qualifiers
+ source 1..2721
+ /organism="synthetic DNA construct"
+ /mol_type="other DNA"
+ misc_feature complement(73..78)
+ /label=BsmBI
+ misc_feature 129..462
+ /label=*ccdB promoter*
+ exon 463..768
+ /label=ccdB
+ /note="ccdB"
+ terminator 809..880
+ /note="rrnB T1 terminator
+ transcription terminator T1 from the E. coli rrnB gene"
+ terminator 896..923
+ /note="T7Te terminator
+ phage T7 early transcription terminator"
+ misc_feature 930..942
+ /label=BioBrick suffix
+ /note="universal suffix for all parts"
+ misc_feature 943..948
+ /label=BsmBI
+ terminator 1027..1056
+ /note="T3Te terminator
+ phage T3 early transcription terminator"
+ rep_origin 1078..1665
+ /direction=RIGHT
+ /label=ori
+ /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+ replication"
+ terminator 1677..1704
+ /note="T7Te terminator
+ phage T7 early transcription terminator"
+ CDS complement(1728..2588)
+ /codon_start=1
+ /gene="bla"
+ /product="beta-lactamase"
+ /note="AmpR
+ confers resistance to ampicillin, carbenicillin, and
+ related antibiotics"
+ /translation="MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYI
+ ELDLNSGKILESLRPEERFPMMSTFKVLLCGAVLSRIDAGQEQLGRRIHYSQNDLVEYS
+ PVTEKHLTDGMTVRELCSAAITMSDNTAANLLLATIGGPKELTAFLHNMGDHVTRLDRW
+ EPELNEAIPNDERDTTMPVAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGSLLRSA
+ LPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGAS
+ LIKHW"
+ promoter complement(2589..2691)
+ /note="cat promoter
+ promoter of the E. coli cat gene"
+ORIGIN
+ 1 ctttctgcta tggaggtcag gtatgattta aatggtcagt attgagcgat atctagagaa
+ 61 ttcgtcatag gagagacgca atacgcaaac cgcctctccc cgcgcgttgg ccgattcatt
+ 121 aatgcaggga tccggcttac taaaagccag ataacagtat gcgtatttgc gcgctgattt
+ 181 ttgcggtata agaatatata ctgatatgta tacccgaagt atgtcaaaaa gaggtatgct
+ 241 atgaagcagc gtattacagt gacagttgac agcgacagct atcagttgct caaggcatat
+ 301 atgatgtcaa tatctccggt ctggtaagca caaccatgca gaatgaagcc cgtcgtctgc
+ 361 gtgccgaacg ctggaaagcg gaaaatcagg aagggatggc tgaggtcgcc cggtttattg
+ 421 aaatgaacgg ctcttttgct gacgagaaca ggggctggtg aaatgcagtt taaggtttac
+ 481 acctataaaa gagagagccg ttatcgtctg tttgtggatg tacagagtga tattattgac
+ 541 acgcccgggc gacggatggt gatccccctg gccagtgcac gtctgctgtc agataaagtc
+ 601 ccccgtgaac tttacccggt ggtgcatatc ggggatgaaa gctggcgcat gatgaccacc
+ 661 gatatggcca gtgtgccggt ctccgttatc ggggaagaag tggctgatct cagccaccgc
+ 721 gaaaatgaca tcaaaaacgc cattaacctg atgttctggg gaatataagc tgatagtgct
+ 781 agtgtagatc gctactagag ccaggcatca aataaaacga aaggctcagt cgaaagactg
+ 841 ggcctttcgt tttatctgtt gtttgtcggt gaacgctctc tactagagtc acactggctc
+ 901 accttcgggt gggcctttct gcgtttatat actagtagcg gccgtctcaa cgataacgaa
+ 961 ttcaagcttg atatcattca ggacgagcct cagactccag cgtaactgga ctgcaatcaa
+ 1021 ctcactggct caccttcacg ggtgggcctt tcttcggtag aaaatcaaag gatcttcttg
+ 1081 agatcctttt tttctgcgcg taatctgctg cttgcaaaca aaaaaaccac cgctaccagc
+ 1141 ggtggtttgt ttgccggatc aagagctacc aactcttttt ccgaggtaac tggcttcagc
+ 1201 agagcgcaga taccaaatac tgttcttcta gtgtagccgt agttaggcca ccacttcaag
+ 1261 aactctgtag caccgcctac atacctcgct ctgctaatcc tgttaccagt ggctgctgcc
+ 1321 agtggcgata agtcgtgtct taccgggttg gactcaagac gatagttacc ggataaggcg
+ 1381 cagcggtcgg gctgaacggg gggttcgtgc acacagccca gcttggagcg aacgacctac
+ 1441 accgaactga gatacctaca gcgtgagcta tgagaaagcg ccacgcttcc cgaagggaga
+ 1501 aaggcggaca ggtatccggt aagcggcagg gtcggaacag gagagcgcac gagggagctt
+ 1561 ccagggggaa acgcctggta tctttatagt cctgtcgggt ttcgccacct ctgacttgag
+ 1621 catcgatttt tgtgatgctc gtcagggggg cggagcctat ggaaaaacgc cagcaacgca
+ 1681 gaaaggccca cccgaaggtg agccaggtga ttacatttgg gccctcatta ccaatgctta
+ 1741 atcagtgagg cacctatctc agcgatctgt ctatttcgtt catccatagt tgcctgactc
+ 1801 cccgtcgtgt agataactac gatgcgggag ggcttaccat ctggccccag tgctgcaatg
+ 1861 ataccgcgag aaccacgctc accggctcca gatttatcag caataaacca gccagccggg
+ 1921 agggccgagc gcagaagtga tcctgcaact ttatccgcct ccatccagtc tattaattgt
+ 1981 tgccgggaag ctagagtaag tagttcgcca gttaatagtt tgcgcaacgt tgttgccatt
+ 2041 gctacaggca tcgtggtgtc acgctcgtcg tttggtatgg cttcattcag ctccggttcc
+ 2101 caacgatcaa ggcgagttac atgatccccc atgttgtgca aaaaagcggt tagctccttc
+ 2161 ggtcctccga tcgttgccag aagtaagttg gccgcagtgt tatcactcat ggttatggca
+ 2221 gcactgcata attctcttac tgtcatgcca tccgtgagat gcttttctgt gactggtgag
+ 2281 tactcaacca agtcattctg agaatagtgt atgcggcgac cgagttgctc ttgcccggcg
+ 2341 tcaatacggg ataataccgc gccacatagc agaactttaa aagtgctcat cattggaaaa
+ 2401 cgttcttcgg ggcgtaaact ctcaaggatc ttaccgctgt tgagatccag ttcgatgtaa
+ 2461 cccactcgtg cacccaactg atcttcagca tcttttactt tcaccagcgt ttctgggtga
+ 2521 gcaaaaacag gaaggcaaaa tgccgcaaaa aagggaataa gggcgacacg gaaatgttga
+ 2581 atactcattt tagcttcctt agctcctgaa aatctcgata actcaaaaaa tacgcccggt
+ 2641 agtgatctta tttcattatg gtgaaagttg gaacctctta cgtgccgatc aagtcaaaag
+ 2701 cctccggtcg gaggcttttg a
+//
diff -r 000000000000 -r 034686b5bc15 test-data/p4_Kt-L7Ae-Weiss.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p4_Kt-L7Ae-Weiss.gb Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,72 @@
+LOCUS . 1845 bp DNA UNK 01-JAN-1980
+DEFINITION .
+ACCESSION
+VERSION
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ terminator 392..419
+ /note="T7Te terminator"
+ /note="phage T7 early transcription terminator"
+ rep_origin complement(431..1018)
+ /direction=LEFT
+ /note="ori"
+ /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+ replication"
+ terminator 1040..1069
+ /note="T3Te terminator"
+ /note="phage T3 early transcription terminator"
+ misc_feature 1143..1148
+ /note="BsmBI"
+ source 5..1149
+ /source="Exported"
+ misc_feature 1156..1182
+ /note="Kt - L7Ae"
+ source 1154..1185
+ /source="Exported"
+ misc_feature complement(1191..1196)
+ /note="BsmBI"
+ terminator 1263..1294
+ /note="tonB terminator"
+ /note="bidirectional E. coli tonB-P14 transcription
+ terminator"
+ promoter 1295..1397
+ /note="cat promoter"
+ /note="promoter of the E. coli cat gene"
+ source 1190..1845
+ /source="Exported"
+ORIGIN
+ 1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+ 61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+ 121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+ 181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+ 241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+ 301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+ 361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+ 421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+ 481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+ 541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+ 601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+ 661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+ 721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+ 781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+ 841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+ 901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+ 961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+ 1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+ 1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+ 1141 tacgtctcgg gacaaggatc cgtgatcgga aacgtgagat ccagttccgc gagacgaaga
+ 1201 cgaattctct agatatcgct caatactgac catttaaatc atacctgacc tccatagcag
+ 1261 aaagtcaaaa gcctccgacc ggaggctttt gacttgatcg gcacgtaaga ggttccaact
+ 1321 ttcaccataa tgaaataaga tcactaccgg gcgtattttt tgagttatcg agattttcag
+ 1381 gagctaagga agctaaaatg agccatattc aacgggaaac gtcttgctcg aggccgcgat
+ 1441 taaattccaa catggatgct gatttatatg ggtataaatg ggctcgcgat aatgtcgggc
+ 1501 aatcaggtgc gacaatctat cgattgtatg ggaagcccga tgcgccagag ttgtttctga
+ 1561 aacatggcaa aggtagcgtt gccaatgatg ttacagatga gatggtcagg ctaaactggc
+ 1621 tgacggaatt tatgcctctt ccgaccatca agcattttat ccgtactcct gatgatgcat
+ 1681 ggttactcac cactgcgatc ccagggaaaa cagcattcca ggtattagaa gaatatcctg
+ 1741 attcaggtga aaatattgtt gatgcgctgg cagtgttcct gcgccggttg cattcgattc
+ 1801 ctgtttgtaa ttgtcctttt aacggcgatc gcgtatttcg tctcg
+//
diff -r 000000000000 -r 034686b5bc15 test-data/p6_Kozak-ATG.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p6_Kozak-ATG.gb Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,76 @@
+LOCUS . 1856 bp DNA UNK 01-JAN-1980
+DEFINITION .
+ACCESSION
+VERSION
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ terminator 392..419
+ /note="T7Te terminator"
+ /note="phage T7 early transcription terminator"
+ rep_origin complement(431..1018)
+ /direction=LEFT
+ /note="ori"
+ /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+ replication"
+ terminator 1040..1069
+ /note="T3Te terminator"
+ /note="phage T3 early transcription terminator"
+ misc_feature 1143..1148
+ /note="BsmBI"
+ source 5..1149
+ /source="Exported"
+ misc_feature 1184..1195
+ /note="Kozak"
+ CDS 1192..1194
+ /codon_start=1
+ /note="ATG"
+ /translation="M"
+ source 1154..1196
+ /source="Exported"
+ misc_feature complement(1202..1207)
+ /note="BsmBI"
+ terminator 1274..1305
+ /note="tonB terminator"
+ /note="bidirectional E. coli tonB-P14 transcription
+ terminator"
+ promoter 1306..1408
+ /note="cat promoter"
+ /note="promoter of the E. coli cat gene"
+ source 1201..1856
+ /source="Exported"
+ORIGIN
+ 1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+ 61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+ 121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+ 181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+ 241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+ 301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+ 361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+ 421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+ 481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+ 541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+ 601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+ 661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+ 721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+ 781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+ 841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+ 901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+ 961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+ 1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+ 1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+ 1141 tacgtctcgc cagaaccgtc agatccgcta gagattacgc caaccgccac catgggcagc
+ 1201 cgagacgaag acgaattctc tagatatcgc tcaatactga ccatttaaat catacctgac
+ 1261 ctccatagca gaaagtcaaa agcctccgac cggaggcttt tgacttgatc ggcacgtaag
+ 1321 aggttccaac tttcaccata atgaaataag atcactaccg ggcgtatttt ttgagttatc
+ 1381 gagattttca ggagctaagg aagctaaaat gagccatatt caacgggaaa cgtcttgctc
+ 1441 gaggccgcga ttaaattcca acatggatgc tgatttatat gggtataaat gggctcgcga
+ 1501 taatgtcggg caatcaggtg cgacaatcta tcgattgtat gggaagcccg atgcgccaga
+ 1561 gttgtttctg aaacatggca aaggtagcgt tgccaatgat gttacagatg agatggtcag
+ 1621 gctaaactgg ctgacggaat ttatgcctct tccgaccatc aagcatttta tccgtactcc
+ 1681 tgatgatgca tggttactca ccactgcgat cccagggaaa acagcattcc aggtattaga
+ 1741 agaatatcct gattcaggtg aaaatattgt tgatgcgctg gcagtgttcc tgcgccggtt
+ 1801 gcattcgatt cctgtttgta attgtccttt taacggcgat cgcgtatttc gtctcg
+//
diff -r 000000000000 -r 034686b5bc15 test-data/p6_Nt-IgKLsequence.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p6_Nt-IgKLsequence.gb Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,83 @@
+LOCUS . 1886 bp DNA UNK 01-JAN-1980
+DEFINITION .
+ACCESSION
+VERSION
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ terminator 392..419
+ /note="T7Te terminator"
+ /note="phage T7 early transcription terminator"
+ rep_origin complement(431..1018)
+ /direction=LEFT
+ /note="ori"
+ /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+ replication"
+ terminator 1040..1069
+ /note="T3Te terminator"
+ /note="phage T3 early transcription terminator"
+ misc_feature 1143..1148
+ /note="BsmBI"
+ source 5..1149
+ /source="Exported"
+ misc_feature 1154..1165
+ /note="KozaK"
+ CDS 1162..1164
+ /codon_start=1
+ /note="ATG"
+ /translation="M"
+ CDS 1165..1224
+ /codon_start=1
+ /note="Ig-kappa leader"
+ /product="leader sequence from mouse immunoglobulin kappa
+ light chain"
+ /translation="ETDTLLLWVLLLWVPGSTGD"
+ source 1154..1226
+ /source="Exported"
+ misc_feature complement(1232..1237)
+ /note="BsmBI"
+ terminator 1304..1335
+ /note="tonB terminator"
+ /note="bidirectional E. coli tonB-P14 transcription
+ terminator"
+ promoter 1336..1438
+ /note="cat promoter"
+ /note="promoter of the E. coli cat gene"
+ source 1231..1886
+ /source="Exported"
+ORIGIN
+ 1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+ 61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+ 121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+ 181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+ 241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+ 301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+ 361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+ 421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+ 481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+ 541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+ 601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+ 661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+ 721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+ 781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+ 841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+ 901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+ 961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+ 1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+ 1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+ 1141 tacgtctcgc cagccgccac catggaaaca gacacactgc tgctatgggt actgctgctc
+ 1201 tgggttccag gttccactgg tgacagcagc cgagacgaag acgaattctc tagatatcgc
+ 1261 tcaatactga ccatttaaat catacctgac ctccatagca gaaagtcaaa agcctccgac
+ 1321 cggaggcttt tgacttgatc ggcacgtaag aggttccaac tttcaccata atgaaataag
+ 1381 atcactaccg ggcgtatttt ttgagttatc gagattttca ggagctaagg aagctaaaat
+ 1441 gagccatatt caacgggaaa cgtcttgctc gaggccgcga ttaaattcca acatggatgc
+ 1501 tgatttatat gggtataaat gggctcgcga taatgtcggg caatcaggtg cgacaatcta
+ 1561 tcgattgtat gggaagcccg atgcgccaga gttgtttctg aaacatggca aaggtagcgt
+ 1621 tgccaatgat gttacagatg agatggtcag gctaaactgg ctgacggaat ttatgcctct
+ 1681 tccgaccatc aagcatttta tccgtactcc tgatgatgca tggttactca ccactgcgat
+ 1741 cccagggaaa acagcattcc aggtattaga agaatatcct gattcaggtg aaaatattgt
+ 1801 tgatgcgctg gcagtgttcc tgcgccggtt gcattcgatt cctgtttgta attgtccttt
+ 1861 taacggcgat cgcgtatttc gtctcg
+//
diff -r 000000000000 -r 034686b5bc15 test-data/p7_L7Ae-Weiss.gb
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/p7_L7Ae-Weiss.gb Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,82 @@
+LOCUS . 2169 bp DNA UNK 01-JAN-1980
+DEFINITION .
+ACCESSION
+VERSION
+KEYWORDS .
+SOURCE .
+ ORGANISM .
+ .
+FEATURES Location/Qualifiers
+ terminator 392..419
+ /note="T7Te terminator"
+ /note="phage T7 early transcription terminator"
+ rep_origin complement(431..1018)
+ /direction=LEFT
+ /note="ori"
+ /note="high-copy-number ColE1/pMB1/pBR322/pUC origin of
+ replication"
+ terminator 1040..1069
+ /note="T3Te terminator"
+ /note="phage T3 early transcription terminator"
+ misc_feature 1143..1148
+ /note="BsmBI"
+ source 5..1149
+ /source="Exported"
+ CDS 1154..1507
+ /codon_start=1
+ /note="L7Ae (Weiss)"
+ /translation="YVRFEVPEDMQNEALSLLEKVRESGKVKKGTNETTKAVERGLAKL
+ VYIAEDVDPPEIVAHLPLLCEEKNVPYIYVKSKNDLGRAVGIEVPCASAAIINEGELRK
+ ELGSLVEKIKGLQK"
+ source 1154..1509
+ /source="Exported"
+ misc_feature complement(1515..1520)
+ /note="BsmBI"
+ terminator 1587..1618
+ /note="tonB terminator"
+ /note="bidirectional E. coli tonB-P14 transcription
+ terminator"
+ promoter 1619..1721
+ /note="cat promoter"
+ /note="promoter of the E. coli cat gene"
+ source 1514..2169
+ /source="Exported"
+ORIGIN
+ 1 ctcaggcgca atcacgaatg aataacggtt tggttggtgc gagtgatttt gatgacgagc
+ 61 gtaatggctg gcctgttgaa caagtctgga aagaaatgca taagcttttg ccattctcac
+ 121 cggattcagt cgtcactcat ggtgatttct cacttgataa ccttattttt gacgagggga
+ 181 aattaatagg ttgtattgat gttggacgag tcggaatcgc agaccgatac caggatcttg
+ 241 ccatcctatg gaactgcctc ggtgagtttt ctccttcatt acagaaacgg ctttttcaaa
+ 301 aatatggtat tgataatcct gatatgaata aattgcagtt tcacttgatg ctcgatgagt
+ 361 ttttctaatg agggcccaaa tgtaatcacc tggctcacct tcgggtgggc ctttctgcgt
+ 421 tgctggcgtt tttccatagg ctccgccccc ctgacgagca tcacaaaaat cgatgctcaa
+ 481 gtcagaggtg gcgaaacccg acaggactat aaagatacca ggcgtttccc cctggaagct
+ 541 ccctcgtgcg ctctcctgtt ccgaccctgc cgcttaccgg atacctgtcc gcctttctcc
+ 601 cttcgggaag cgtggcgctt tctcatagct cacgctgtag gtatctcagt tcggtgtagg
+ 661 tcgttcgctc caagctgggc tgtgtgcacg aaccccccgt tcagcccgac cgctgcgcct
+ 721 tatccggtaa ctatcgtctt gagtccaacc cggtaagaca cgacttatcg ccactggcag
+ 781 cagccactgg taacaggatt agcagagcga ggtatgtagg cggtgctaca gagttcttga
+ 841 agtggtggcc taactacggc tacactagaa gaacagtatt tggtatctgc gctctgctga
+ 901 agccagttac ctcggaaaaa gagttggtag ctcttgatcc ggcaaacaaa ccaccgctgg
+ 961 tagcggtggt ttttttgttt gcaagcagca gattacgcgc agaaaaaaag gatctcaaga
+ 1021 agatcctttg attttctacc gaagaaaggc ccacccgtga aggtgagcca gtgagttgat
+ 1081 tgcagtccag ttacgctgga gtctgaggct cgtcctgaat gatatcaagc ttgaattcgt
+ 1141 tacgtctcgc agctacgtga gatttgaggt tcctgaggac atgcagaacg aagctctgag
+ 1201 tctgctggag aaggttaggg agagcggtaa ggtaaagaaa ggtaccaacg aaacgacaaa
+ 1261 ggctgtggag aggggactgg caaagctcgt ttacatcgca gaggatgttg acccgcctga
+ 1321 gatcgttgct catctgcccc tcctctgcga ggagaagaat gtgccgtaca tttacgttaa
+ 1381 aagcaagaac gaccttggaa gggctgtggg cattgaggtg ccatgcgctt cggcagcgat
+ 1441 aatcaacgag ggagagctga gaaaggagct tggaagcctt gtggagaaga ttaaaggcct
+ 1501 tcagaagtca ggccgagacg aagacgaatt ctctagatat cgctcaatac tgaccattta
+ 1561 aatcatacct gacctccata gcagaaagtc aaaagcctcc gaccggaggc ttttgacttg
+ 1621 atcggcacgt aagaggttcc aactttcacc ataatgaaat aagatcacta ccgggcgtat
+ 1681 tttttgagtt atcgagattt tcaggagcta aggaagctaa aatgagccat attcaacggg
+ 1741 aaacgtcttg ctcgaggccg cgattaaatt ccaacatgga tgctgattta tatgggtata
+ 1801 aatgggctcg cgataatgtc gggcaatcag gtgcgacaat ctatcgattg tatgggaagc
+ 1861 ccgatgcgcc agagttgttt ctgaaacatg gcaaaggtag cgttgccaat gatgttacag
+ 1921 atgagatggt caggctaaac tggctgacgg aatttatgcc tcttccgacc atcaagcatt
+ 1981 ttatccgtac tcctgatgat gcatggttac tcaccactgc gatcccaggg aaaacagcat
+ 2041 tccaggtatt agaagaatat cctgattcag gtgaaaatat tgttgatgcg ctggcagtgt
+ 2101 tcctgcgccg gttgcattcg attcctgttt gtaattgtcc ttttaacggc gatcgcgtat
+ 2161 ttcgtctcg
+//
diff -r 000000000000 -r 034686b5bc15 test-data/test_raport.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_raport.txt Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,5 @@
+p7_L7Ae-Weiss
+p6_Nt-IgKLsequence
+p6_Kozak-ATG
+p4_Kt-L7Ae-Weiss
+HC_Amp_ccdB
diff -r 000000000000 -r 034686b5bc15 verification.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/verification.py Thu Apr 24 09:56:36 2025 +0000
@@ -0,0 +1,20 @@
+from sqlalchemy import create_engine, text
+
+db_uri = "postgresql://postgres:RK17@localhost:5432/test_fragments_db"
+engine = create_engine(db_uri)
+
+with engine.connect() as conn:
+ result = conn.execute(text("""
+ SELECT fragment, sequence, annotation
+ FROM sample
+ ORDER BY fragment
+ """))
+
+ print("Full contents of fragments in DB:\n")
+ for row in result:
+ print(f" Fragment: {row.fragment}")
+ print(" Sequence:")
+ print(row.sequence)
+ print("\n Annotation:")
+ print(row.annotation)
+ print("-" * 80)
\ No newline at end of file