# HG changeset patch # User tduigou # Date 1747388343 0 # Node ID dc450979fcd4948d8a22063bc4d3606c1db98dac planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae diff -r 000000000000 -r dc450979fcd4 cloning_similation.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cloning_similation.xml Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,251 @@ + + A cloning simulator for synthetic biology + + 0 + 0.1.0 + + + flametree + biopython + proglog + pandas + dnacauldron + pdf-reports + sequenticon + dna_features_viewer + + + + + + + + + + + + + + + + + +
+ + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + `_). + +**Parameters**: +--------------- +* **assemnby csv**: csv file containes the construct names with their parts. +* **GenBank files**: Upload all the parts sequences for your assembli(es). Don't forget the receptor vector(s). +* **topology**: Can be "circular", "linear". +* **enzyme**: It will be selected automatically, or you can set one from this dictionnary : `dict `_ +* **assembly_class**: + - "GoldenGate_assembly": Type2sRestrictionAssembly (recommended in Domesticated dataset) `ex. dataset `_ + - "gibson_assembly": GibsonAssembly `ex. dataset `_ + - "BASIC_assembly": BASICAssembly `ex. dataset `_ + - "biobrick_assembly": BioBrickStandardAssembly, `ex. dataset `_ + - "lcr_assembly": LigaseCyclingReactionAssembly `ex. dataset `_ + ]]> + + + @unpublished{cloning_simulation + author = {Ramiz Khaled}, + title = {{cloning_simulation}}, + url = {https://github.com/Edinburgh-Genome-Foundry/Examples/blob/master/templates/template1.ipynb}, + } + + +
diff -r 000000000000 -r dc450979fcd4 cloning_simulation.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cloning_simulation.py Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,155 @@ +import os +import dnacauldron +from Bio import SeqIO +import pandas +import argparse +import zipfile + +def cloning_simulation(files_to_assembly, domesticated_list, + csv_file, assembly_type, topology, + file_name_mapping, file_name_mapping_dom, + use_file_names_as_id, + outdir_simulation, output_simulation,enzyme): + + files_to_assembly = files_to_assembly.split(',') + + repository = dnacauldron.SequenceRepository() + repository.import_records(files=files_to_assembly, + use_file_names_as_ids=use_file_names_as_id, + topology=topology) + if domesticated_list: + domesticated_files = domesticated_list.split(',') + repository.import_records(files=domesticated_files, + use_file_names_as_ids=use_file_names_as_id, + topology=topology) + + #refine the real record name dict + if isinstance(file_name_mapping, str): + file_name_mapping = dict( + item.split(":") for item in file_name_mapping.split(",") + ) + real_names = { + os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") + for k, v in file_name_mapping.items() + } + + #refine the real record name dict_dom + if file_name_mapping_dom == "": + file_name_mapping_dom={} + else: + if isinstance(file_name_mapping_dom, str): + file_name_mapping_dom = dict( + item.split(":") for item in file_name_mapping_dom.split(",") + ) + dom_real_names = { + os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") + for k, v in file_name_mapping_dom.items() + } + real_names.update(dom_real_names) + + #update the records + + for key, record in list(repository.collections["parts"].items()): + current_id = record.id + if current_id in real_names: + new_id = real_names[current_id] + record.id = new_id + record.name = new_id + record.description = new_id + repository.collections["parts"][new_id] = repository.collections["parts"].pop(key) + ######################################################## + #print (f"repo: {vars(repository)}") + any(pandas.read_csv(csv_file, index_col=0, header=None).duplicated()) + + if assembly_type == "Type2sRestrictionAssembly": + assembly_class = dnacauldron.Type2sRestrictionAssembly + elif assembly_type == "GibsonAssembly": + assembly_class = dnacauldron.GibsonAssembly + elif assembly_type == "BASICAssembly": + assembly_class = dnacauldron.BASICAssembly + elif assembly_type == "BioBrickStandardAssembly": + assembly_class = dnacauldron.BioBrickStandardAssembly + elif assembly_type == "OligoPairAnnealin": + assembly_class = dnacauldron.OligoPairAnnealin + elif assembly_type == "LigaseCyclingReactionAssembly": + assembly_class = dnacauldron.LigaseCyclingReactionAssembly + else: + raise ValueError(f"Unsupported assembly type: {assembly_type}") + + new_csvname = "assambly.csv" + os.rename(csv_file, new_csvname) + + assembly_plan = dnacauldron.AssemblyPlan.from_spreadsheet( + name="auto_from_filename", + path=new_csvname, + assembly_class=assembly_class + ) + if enzyme != 'auto': + for assembly in assembly_plan.assemblies: + assembly.enzyme = enzyme + + simulation = assembly_plan.simulate(sequence_repository=repository) + stats = simulation.compute_stats() + print(stats) + + report_writer = dnacauldron.AssemblyReportWriter( + include_mix_graphs=True, + include_assembly_plots=True, + show_overhangs_in_graph=True, + annotate_parts_homologies=True, + include_pdf_report=True, + ) + simulation.write_report(outdir_simulation, assembly_report_writer=report_writer) + + # Append report files to .dat (ZIP) + with zipfile.ZipFile(output_simulation, mode='a', compression=zipfile.ZIP_DEFLATED) as zipf: + for root, dirs, files in os.walk(outdir_simulation): + for file in files: + full_path = os.path.join(root, file) + arcname = os.path.relpath(full_path, outdir_simulation) + zipf.write(full_path, arcname) + print("Files in the zip archive:") + for info in zipf.infolist(): + print(info.filename) + + return output_simulation + + +def parse_command_line_args(): + parser = argparse.ArgumentParser(description="Domestication") + + parser.add_argument("--parts_files", required=True, + help="List of GenBank files (Comma-separated)") + parser.add_argument("--domesticated_seq", required=True, + help="output of domestication (ganbank list)") + parser.add_argument("--assembly_csv", required=True, + help="csv assembly") + parser.add_argument('--assembly_plan_name', type=str, + help='type of assembly') + parser.add_argument('--topology', type=str, + help='"circular" or "linear"') + parser.add_argument('--file_name_mapping', type=str, + help='Mapping of Galaxy filenames to original filenames') + parser.add_argument('--file_name_mapping_dom', type=str, + help='Mapping of Galaxy filenames to original domestication filenames') + parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, + help="Use file names as IDs (True/False)") + parser.add_argument("--outdir_simulation", required=True, + help="dir output for cloning simulation results") + parser.add_argument("--output_simulation", required=True, + help="zip output for cloning simulation results") + parser.add_argument('--enzyme', type=str, + help='enzyme to use') + + return parser.parse_args() + +if __name__ == "__main__": + args = parse_command_line_args() + + cloning_simulation( + args.parts_files, args.domesticated_seq, + args.assembly_csv, args.assembly_plan_name, args.topology, + args.file_name_mapping, args.file_name_mapping_dom, + args.use_file_names_as_id, + args.outdir_simulation,args.output_simulation, args.enzyme + ) \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 output.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output.html Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,291 @@ + + + + + + + Test Results (powered by Planemo) + + + + + + + + + + +
+
+
+
+ + + + + + + \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 output.json --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/output.json Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,1370 @@ +{ + "summary": { + "num_errors": 0, + "num_failures": 0, + "num_skips": 0, + "num_tests": 6 + }, + "tests": [ + { + "data": { + "inputs": { + "adv|use_file_names_as_ids": true, + "assembly_csv": { + "id": "a2e8f3fd1611e7f5", + "src": "hda" + }, + "assembly_plan_name": "Type2sRestrictionAssembly", + "genbank_files": { + "id": "1d1943aa60deb628", + "src": "hdca" + }, + "topology": "circular" + }, + "job": { + "command_line": "mkdir 'outdir_zip' && python '/home/rkhaled/galaxytools/tools/cloning_simulation/cloning_simulation.py' --parts_files '/tmp/tmps36anksu/files/6/a/4/dataset_6a407f29-14a2-4358-8781-97a5a60006bf.dat,/tmp/tmps36anksu/files/9/c/0/dataset_9c05120b-2a93-4298-a718-2db482dec9e0.dat,/tmp/tmps36anksu/files/e/9/d/dataset_e9dd6c4a-5d1c-48d6-b072-bb67929590f5.dat,/tmp/tmps36anksu/files/c/2/a/dataset_c2a67b9c-bd0d-483b-b229-1f396bc8c275.dat,/tmp/tmps36anksu/files/5/3/d/dataset_53dc6b3e-3d2b-4fb7-b279-8b7204ae8d00.dat,/tmp/tmps36anksu/files/4/0/f/dataset_40fa89b4-5e49-467e-aa6a-d4b8c9b8a8b0.dat,/tmp/tmps36anksu/files/1/4/b/dataset_14b815cb-038e-4836-bb4e-3f049395271c.dat,/tmp/tmps36anksu/files/3/2/7/dataset_32719970-8009-4818-a623-ba980af84d70.dat,/tmp/tmps36anksu/files/6/8/e/dataset_68e30a2d-a44d-40e6-a619-efa1e8cd6e4b.dat,/tmp/tmps36anksu/files/2/3/c/dataset_23c06fae-96cc-4ff8-9e23-111139ce1a92.dat,/tmp/tmps36anksu/files/d/2/9/dataset_d29c16a7-2470-418e-a8f1-1da7c5372dde.dat,/tmp/tmps36anksu/files/2/9/0/dataset_290bfdf4-f595-42f1-a2f0-0f9a9d8e71df.dat' --domesticated_seq '' --assembly_csv '/tmp/tmps36anksu/files/4/2/0/dataset_420c4b07-ea17-4e63-9f48-4cc364e28432.dat' --assembly_plan_name 'Type2sRestrictionAssembly' --file_name_mapping '/tmp/tmps36anksu/files/6/a/4/dataset_6a407f29-14a2-4358-8781-97a5a60006bf.dat:part_A.gb,/tmp/tmps36anksu/files/9/c/0/dataset_9c05120b-2a93-4298-a718-2db482dec9e0.dat:part_B.gb,/tmp/tmps36anksu/files/e/9/d/dataset_e9dd6c4a-5d1c-48d6-b072-bb67929590f5.dat:part_C.gb,/tmp/tmps36anksu/files/c/2/a/dataset_c2a67b9c-bd0d-483b-b229-1f396bc8c275.dat:part_D.gb,/tmp/tmps36anksu/files/5/3/d/dataset_53dc6b3e-3d2b-4fb7-b279-8b7204ae8d00.dat:part_E.gb,/tmp/tmps36anksu/files/4/0/f/dataset_40fa89b4-5e49-467e-aa6a-d4b8c9b8a8b0.dat:part_F.gb,/tmp/tmps36anksu/files/1/4/b/dataset_14b815cb-038e-4836-bb4e-3f049395271c.dat:part_G.gb,/tmp/tmps36anksu/files/3/2/7/dataset_32719970-8009-4818-a623-ba980af84d70.dat:part_H.gb,/tmp/tmps36anksu/files/6/8/e/dataset_68e30a2d-a44d-40e6-a619-efa1e8cd6e4b.dat:part_I.gb,/tmp/tmps36anksu/files/2/3/c/dataset_23c06fae-96cc-4ff8-9e23-111139ce1a92.dat:part_J.gb,/tmp/tmps36anksu/files/d/2/9/dataset_d29c16a7-2470-418e-a8f1-1da7c5372dde.dat:part_K.gb,/tmp/tmps36anksu/files/2/9/0/dataset_290bfdf4-f595-42f1-a2f0-0f9a9d8e71df.dat:part_L.gb' --file_name_mapping_dom '' --use_file_names_as_id 'true' --outdir_simulation 'outdir_zip' --output_simulation 'output_zip.zip' --enzyme 'auto' --topology 'circular'&& cp 'output_zip.zip' '/tmp/tmps36anksu/job_working_directory/000/14/outputs/dataset_19827d46-96d8-48c8-b434-17f9e4732aec.dat'", + "command_version": "", + "copied_from_job_id": null, + "create_time": "2025-05-16T08:54:45.629427", + "dependencies": [], + "exit_code": 0, + "external_id": "69472", + "galaxy_version": "24.2", + "handler": null, + "history_id": "1d1943aa60deb628", + "id": "306d449b91ecb0a3", + "inputs": { + "assembly_csv": { + "id": "a2e8f3fd1611e7f5", + "src": "hda", + "uuid": "420c4b07-ea17-4e63-9f48-4cc364e28432" + }, + "genbank_files1": { + "id": "1d1943aa60deb628", + "src": "hda", + "uuid": "6a407f29-14a2-4358-8781-97a5a60006bf" + }, + "genbank_files10": { + "id": "b55875807068e59e", + "src": "hda", + "uuid": "23c06fae-96cc-4ff8-9e23-111139ce1a92" + }, + "genbank_files11": { + "id": "4b64a67f616af776", + "src": "hda", + "uuid": "d29c16a7-2470-418e-a8f1-1da7c5372dde" + }, + "genbank_files12": { + "id": "1fda217ac9617c88", + "src": "hda", + "uuid": "290bfdf4-f595-42f1-a2f0-0f9a9d8e71df" + }, + "genbank_files2": { + "id": "13a3899f03fff34a", + "src": "hda", + "uuid": "9c05120b-2a93-4298-a718-2db482dec9e0" + }, + "genbank_files3": { + "id": "6872e1ad22eb2e5d", + "src": "hda", + "uuid": "e9dd6c4a-5d1c-48d6-b072-bb67929590f5" + }, + "genbank_files4": { + "id": "248d7f69d2402068", + "src": "hda", + "uuid": "c2a67b9c-bd0d-483b-b229-1f396bc8c275" + }, + "genbank_files5": { + "id": "a363c57bce5ddac4", + "src": "hda", + "uuid": "53dc6b3e-3d2b-4fb7-b279-8b7204ae8d00" + }, + "genbank_files6": { + "id": "8ab843ac4a5890da", + "src": "hda", + "uuid": "40fa89b4-5e49-467e-aa6a-d4b8c9b8a8b0" + }, + "genbank_files7": { + "id": "6206f91299006d13", + "src": "hda", + "uuid": "14b815cb-038e-4836-bb4e-3f049395271c" + }, + "genbank_files8": { + "id": "278f145ab0478700", + "src": "hda", + "uuid": "32719970-8009-4818-a623-ba980af84d70" + }, + "genbank_files9": { + "id": "84aedadb70ef205b", + "src": "hda", + "uuid": "68e30a2d-a44d-40e6-a619-efa1e8cd6e4b" + } + }, + "job_messages": [], + "job_metrics": [], + "job_runner_name": null, + "job_stderr": "", + "job_stdout": "", + "model_class": "Job", + "output_collections": {}, + "outputs": { + "output_zip": { + "id": "306d449b91ecb0a3", + "src": "hda", + "uuid": "19827d46-96d8-48c8-b434-17f9e4732aec" + } + }, + "params": { + "__input_ext": "\"input\"", + "adv": "{\"enzyme\": \"auto\", \"use_file_names_as_ids\": true}", + "assembly_plan_name": "\"Type2sRestrictionAssembly\"", + "chromInfo": "\"/tmp/tmps36anksu/galaxy-dev/tool-data/shared/ucsc/chrom/?.len\"", + "dbkey": "\"?\"", + "domesticated_input": null, + "genbank_files": "{\"values\": [{\"id\": 1, \"src\": \"hdca\"}]}", + "topology": "\"circular\"" + }, + "state": "ok", + "stderr": "\rassembly: 0%| | 0/4 [00:00L2RBS-1P_fwd +GGACTAGTCCTGTGTGATTTTCTGCTACCCTTATCTCAG +>L2RBS-1P_adapter +ATCACACAGGACTA +>LMA-S_fwd +CTCGGGTAAGAACTCGCACTTCGTGGAAACACTATTA +>LMA-S_adapter +CGAGTTCTTACC +>L1RBS-xS_fwd +CTCGTTGAACACCGTCTCAGGTAAGTATCAGTTGTAA +>L1RBS-xS_adapter +GACGGTGTTCAA +>L1RBS-1P_fwd +GGACTAGTCCTGTGTGATTTACAACTGATACTTACCTGA +>L1RBS-1P_adapter +ATCACACAGGACTA +>L2RBS-xS_fwd +CTCGTGTTACTATTGGCTGAGATAAGGGTAGCAGAAA +>L2RBS-xS_adapter +CCAATAGTAACA +>LMA-P_fwd +GGACAGAGACCCACCAGATAATAGTGTTTCCACGAAGTG +>LMA-P_adapter +TCTGGTGGGTCTCT +>LMB-S_fwd +CTCGGGAGACCTATCGGTAATAACAGTCCAATCTGGTGT +>LMB-S_adapter +CGATAGGTCTCC +>LMB-P_fwd +GGACGATTCCGAAGTTACACCAGATTGGACTGTTATTAC +>LMB-P_adapter +AACTTCGGAATC diff -r 000000000000 -r dc450979fcd4 test-data/BASIC_assembly_dataset/p15A_kan.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BASIC_assembly_dataset/p15A_kan.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,75 @@ +LOCUS F0001_p15A-Kan.xdna 2235 bp DNA linear 22/05/2017 +DEFINITION +ACCESSION +VERSION +SOURCE + ORGANISM +COMMENT Serial Cloner Genbank Format +COMMENT SerialCloner_Type=DNA +COMMENT SerialCloner_Comments= +COMMENT SerialCloner_Ends=0,0,,0, +FEATURES Location/Qualifiers + misc_feature complement(72..887) + /label=p15A + /SerialCloner_Color=&h84A4C0 + /SerialCloner_Show=True + /SerialCloner_Protect=True + /SerialCloner_Arrow=True + misc_feature complement(1223..2043) + /label=kanR + /SerialCloner_Color=&hFF0000 + /SerialCloner_Show=True + /SerialCloner_Protect=True + /SerialCloner_Arrow=True + misc_feature 1..18 + /label=iPrefix + /SerialCloner_Color=&hFF00FF + /SerialCloner_Show=True + /SerialCloner_Protect=True + /SerialCloner_Arrow=True + misc_feature 2218..2235 + /label=iSuffix + /SerialCloner_Color=&hFF00FF + /SerialCloner_Show=True + /SerialCloner_Protect=True + /SerialCloner_Arrow=True +ORIGIN + 1 TCTGGTGGGT CTCTGTCCCG TCAAGTCAGC GTAATGCTCT GCCAGTGTTA CAACCAATTA + 61 ACCAATTCTG ATTAGAAAAA CTCATCGAGC ATCAAATGAA ACTGCAATTT ATTCATATCA + 121 GGATTATCAA TACCATATTT TTGAAAAAGC CGTTTCTGTA ATGAAGGAGA AAACTCACCG + 181 AGGCAGTTCC ATAGGATGGC AAGATCCTGG TATCGGTCTG CGATTCCGAC TCGTCCAACA + 241 TCAATACAAC CTATTAATTT CCCCTCGTCA AAAATAAGGT TATCAAGTGA GAAATCACCA + 301 TGAGTGACGA CTGAATCCGG TGAGAATGGC AAAAGCTTAT GCATTTCTTT CCAGACTTGT + 361 TCAACAGGCC AGCCATTACG CTCGTCATCA AAATCACTCG CATCAACCAA ACCGTTATTC + 421 ATTCGTGATT GCGCCTGAGC GAGACGAAAT ACGCGATCGC TGTTAAAAGG ACAATTACAA + 481 ACAGGAATCG AATGCAACCG GCGCAGGAAC ACTGCCAGCG CATCAACAAT ATTTTCACCT + 541 GAATCAGGAT ATTCTTCTAA TACCTGGAAT GCTGTTTTCC CGGGGATCGC AGTGGTGAGT + 601 AACCATGCAT CATCAGGAGT ACGGATAAAA TGCTTGATGG TCGGAAGAGG CATAAATTCC + 661 GTCAGCCAGT TTAGTCTGAC CATCTCATCT GTAACATCAT TGGCAACGCT ACCTTTGCCA + 721 TGTTTCAGAA ACAACTCTGG CGCATCGGGC TTCCCATACA ATCGATAGAT TGTCGCACCT + 781 GATTGCCCGA CATTATCGCG AGCCCATTTA TACCCATATA AATCAGCATC CATGTTGGAA + 841 TTTAATCGCG GCCTCGAGCA AGACGTTTCC CGTTGAATAT GGCTCATAAC ACCCCTTGTA + 901 TTACTGTTTA TGTAAGCAGA CAGTTTTATT GTTCATGATG ATATATTTTT ATCTTGTGCA + 961 ATGTAACATC AGAGATTTTG AGACACAACG TGGCTTTGTT GAATAAATCG AACTTTTGCT + 1021 GAGTTGAAGG ATCAGATCAC GCATCTTCCC GACAACGCAG ACCGTTCCGT GGCAAAGCAA + 1081 AAGTTCAAAA TCACCAACTG GTCCACCTAC AACAAAGCTC TCATCAACCG TGGCTCCCTC + 1141 ACTTTCTGGC TGGATGATGG GGCGATTCAG GCCTGGTATG AGTCAGCAAC ACCTTCTTCA + 1201 CGAGGCAGAC CTCAGCGCTA GCGGAGTGTA TACTGGCTTA CTATGTTGGC ACTGATGAGG + 1261 GTGTCAGTGA AGTGCTTCAT GTGGCAGGAG AAAAAAGGCT GCACCGGTGC GTCAGCAGAA + 1321 TATGTGATAC AGGATATATT CCGCTTCCTC GCTCACTGAC TCGCTACGCT CGGTCGTTCG + 1381 ACTGCGGCGA GCGGAAATGG CTTACGAACG GGGCGGAGAT TTCCTGGAAG ATGCCAGGAA + 1441 GATACTTAAC AGGGAAGTGA GAGGGCCGCG GCAAAGCCGT TTTTCCATAG GCTCCGCCCC + 1501 CCTGACAAGC ATCACGAAAT CTGACGCTCA AATCAGTGGT GGCGAAACCC GACAGGACTA + 1561 TAAAGATACC AGGCGTTTCC CCTGGCGGCT CCCTCGTGCG CTCTCCTGTT CCTGCCTTTC + 1621 GGTTTACCGG TGTCATTCCG CTGTTATGGC CGCGTTTGTC TCATTCCACG CCTGACACTC + 1681 AGTTCCGGGT AGGCAGTTCG CTCCAAGCTG GACTGTATGC ACGAACCCCC CGTTCAGTCC + 1741 GACCGCTGCG CCTTATCCGG TAACTATCGT CTTGAGTCCA ACCCGGAAAG ACATGCAAAA + 1801 GCACCACTGG CAGCAGCCAC TGGTAATTGA TTTAGAGGAG TTAGTCTTGA AGTCATGCGC + 1861 CGGTTAAGGC TAAACTGAAA GGACAAGTTT TGGTGACTGC GCTCCTCCAA GCCAGTTACC + 1921 TCGGTTCAAA GAGTTGGTAG CTCAGAGAAC CTTCGAAAAA CCGCCCTGCA AGGCGGTTTT + 1981 TTCGTTTTCA GAGCAAGAGA TTACGCGCAG ACCAAAACGA TCTCAAGAAG ATCATCTTAT + 2041 TAAGGGGTCT GACGCTCAGT GGAACGAAAA CTCACGTTAA GGGATTTTGG TCATGAGATT + 2101 ATCAAAAAGG ATCTTCACCT AGATCCTTTT AAATTAAAAA TGAAGTTTTA AATCAATCTA + 2161 AAGTATATAT GAGTAAACTT GGTCTGACAG TTACCAATGC TTAATCAGTG AGGCACCGGC + 2221 TCGGGAGACC TATCG +// \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 test-data/BASIC_assembly_dataset/promoter.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BASIC_assembly_dataset/promoter.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,170 @@ +LOCUS promoter 4361 bp DNA circular UNK 01-JAN-1980 +DEFINITION +ACCESSION +VERSION +SOURCE + ORGANISM +COMMENT Serial Cloner Genbank Format +COMMENT SerialCloner_Type=DNA +COMMENT SerialCloner_Comments= +COMMENT SerialCloner_Ends=0,0,,0, +FEATURES Location/Qualifiers + misc_feature 984..1028 + /label=Linker01 + /SerialCloner_Color=&h8000FF + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1168..1212 + /label=Linker02 + /SerialCloner_Color=&h8000FF + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1327..1361 + /label=PJ23105 + /SerialCloner_Color=&h84A4C0 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + primer_bind 4295..4314 + /label=T7 + /SerialCloner_Color=&h008B8B + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + rep_origin complement(2161..2789) + /label=ColE1 origin + /SerialCloner_Color=&h4682B4 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + CDS complement(2941..3600) + /label=AmpR + /SerialCloner_Color=&h8FBC8F + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + CDS 168..824 + /label=CmR + /SerialCloner_Color=&h8FBC8F + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + Promoter complement(3840..3868) + /label=Amp prom + /SerialCloner_Color=&h4682B4 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1033..1161 + /label=terminator_B0015 + /SerialCloner_Color=&h84A4C0 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 4295..4311 + /label=T7-promoter + /SerialCloner_Color=&hFF00FF + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1..18 + /label=Prefix + /SerialCloner_Color=&hFF0080 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1370..1387 + /label=Suffix + /SerialCloner_Color=&hFF00FF + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 3601..3797 + /label=pB1a promoter + /SerialCloner_Color=&h84A4C0 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True + misc_feature 1327..1361 + /label=ANDERSON 024 + /SerialCloner_Color=&h84A4C0 + /SerialCloner_Show=True + /SerialCloner_Protect=False + /SerialCloner_Arrow=True +ORIGIN + 1 tctggtgggt ctctGTCCTT GATACCGGGA AGCCCTGGGC CAACTTTTGG CGAAAATGAG + 61 ACGTTGATCG GCACGTAAGA GGTTCCAACT TTCACCATAA TGAAATAAGA TCACTACCGG + 121 GCGTATTTTT TGAGTTATCG AGATTTTCAG GAGCTAAGGA AGCTAAAATG GAGAAAAAAA + 181 TCACTGGATA TACCACCGTT GATATATCCC AATGGCATCG TAAAGAACAT TTTGAGGCAT + 241 TTCAGTCAGT TGCTCAATGT ACCTATAACC AGACCGTTCA GCTGGATATT ACGGCCTTTT + 301 TAAAGACCGT AAAGAAAAAT AAGCACAAGT TTTATCCGGC CTTTATTCAC ATTCTTGCCC + 361 GCCTGATGAA TGCTCATCCG GAATTCCGTA TGGCAATGAA AGACGGTGAG CTGGTGATAT + 421 GGGATAGTGT TCACCCTTGT TACACCGTTT TCCATGAGCA AACTGAAACG TTTTCATCGC + 481 TCTGGAGTGA ATACCACGAC GATTTCCGGC AGTTTCTACA CATATATTCG CAAGATGTGG + 541 CGTGTTACGG TGAAAACCTG GCCTATTTCC CTAAAGGGTT TATTGAGAAT ATGTTTTTCG + 601 TCTCAGCCAA TCCCTGGGTG AGTTTCACCA GTTTTGATTT AAACGTGGCC AATATGGACA + 661 ACTTCTTCGC CCCCGTTTTC ACCATGGGCA AATATTATAC GCAAGGCGAC AAGGTGCTGA + 721 TGCCGCTGGC GATTCAGGTT CATCATGCCG TCTGTGATGG CTTCCATGTC GGCAGAATGC + 781 TTAATGAATT ACAACAGTAC TGCGATGAGT GGCAGGGCGG GGCGTAATTT TTTTAAGGCA + 841 GTTATTGGTG CCCTTAAACG CCTGGTGCTA CGCCTGAATA AGTGATAATA AGCGGATGAA + 901 TGGCAGAAAT TCGAAAGCAA ATTCGACCCG GTCGTCGGTT CAGGGCAGGG TCGTTAAATA + 961 GCCGCTTATG TCTATTGGGC TCGttactta cgacactccg agacagtcag agggtattta + 1021 ttgaactaGT CCCCAGGCAT CAAATAAAAC GAAAGGCTCA GTCGAAAGAC TGGGCCTTTC + 1081 GTTTTATCTG TTGTTTGTCG GTGAACGCTC TCTACTAGAG TCACACTGGC TCACCTTCGG + 1141 GTGGGCCTTT CTGCGTTTAT AGGCTCGATC GGTGTGAAAA GTCAGTATCC AGTCGTGTAG + 1201 TTCTTATTAC CTGTCCAGAC CTTTACGCCG CTGGAGACTA GTCACAGCTA ACACCACGTC + 1261 GTCCCTATCT GCTGCCCTAG GTCTATGAGT GGTTGCTGGA TAACGAATTC GCGGCCGCTT + 1321 CTAGAGTTTA CGGCTAGCTC AGTCCTAGGT ACTATGCTAG CTACTAGAGG GCTCGggaga + 1381 cctatcgATC TTTCTAGAAG ATCTCCTACA ATATTCTCAG CTGCCATGGA AAATCGATGT + 1441 TCTTCTTTTA TTCTCTCAAG ATTTTCAGGC TGTATATTAA AACTTATATT AAGAACTATG + 1501 CTAACCACCT CATCAGGAAC CGTTGTAGGT GGCGTGGGTT TTCTTGGCAA TCGACTCTCA + 1561 TGAAAACTAC GAGCTAAATA TTCAATATGT TCCTCTTGAC CAACTTTATT CTGCATTTTT + 1621 TTTGAACGAG GTTTAGAGCA AGCTTCAGGA AACTGAGACA GGAATTTTAT TAAAAATTTA + 1681 AATTTTGAAG AAAGTTCAGG GTTAATAGCA TCCATTTTTT GCTTTGCAAG TTCCTCAGCA + 1741 TTCTTAACAA AAGACGTCTC TTTTGACATG TTTAAAGTTT AAACCTCCTG TGTGAAATTA + 1801 TTATCCGCTC ATAATTCCAC ACATTATACG AGCCGGAAGC ATAAAGTGTA AAGCCTGGGG + 1861 TGCCTAATGA GTGAGCTAAC TCACATTAAT TGCGTTGCGC TCACTGCCAA TTGCTTTCCA + 1921 GTCGGGAAAC CTGTCGTGCC AGCTGCATTA ATGAATCGGC CAACGCGCGG GGAGAGGCGG + 1981 TTTGCGTATT GGGCGCTCTT CCGCTTCCTC GCTCACTGAC TCGCTGCGCT CGGTCGTTCG + 2041 GCTGCGGCGA GCGGTATCAG CTCACTCAAA GGCGGTAATA CGGTTATCCA CAGAATCAGG + 2101 GGATAACGCA GGAAAGAACA TGTGAGCAAA AGGCCAGCAA AAGGCCAGGA ACCGTAAAAA + 2161 GGCCGCGTTG CTGGCGTTTT TCCATAGGCT CCGCCCCCCT GACGAGCATC ACAAAAATCG + 2221 ACGCTCAAGT CAGAGGTGGC GAAACCCGAC AGGACTATAA AGATACCAGG CGTTTCCCCC + 2281 TGGAAGCTCC CTCGTGCGCT CTCCTGTTCC GACCCTGCCG CTTACCGGAT ACCTGTCCGC + 2341 CTTTCTCCCT TCGGGAAGCG TGGCGCTTTC TCATAGCTCA CGCTGTAGGT ATCTCAGTTC + 2401 GGTGTAGGTC GTTCGCTCCA AGCTGGGCTG TGTGCACGAA CCCCCCGTTC AGCCCGACCG + 2461 CTGCGCCTTA TCCGGTAACT ATCGTCTTGA GTCCAACCCG GTAAGACACG ACTTATCGCC + 2521 ACTGGCAGCA GCCACTGGTA ACAGGATTAG CAGAGCGAGG TATGTAGGCG GTGCTACAGA + 2581 GTTCTTGAAG TGGTGGCCTA ACTACGGCTA CACTAGAAGG ACAGTATTTG GTATCTGCGC + 2641 TCTGCTGAAG CCAGTTACCT TCGGAAAAAG AGTTGGTAGC TCTTGATCCG GCAAACAAAC + 2701 CACCGCTGGT AGCGGTGGTT TTTTTGTTTG CAAGCAGCAG ATTACGCGCA GAAAAAAAGG + 2761 ATCTCAAGAA GATCCTTTGA TCTTTTCTAC GGGGTCTGAC GCTCAGTGGA ACGAAAACTC + 2821 ACGTTAAGGG ATTTTGGTCA TGAGATTATC AAAAAGGATC TTCACCTAGA TCCTTTTAAA + 2881 TTAAAAATGA AGTTTTAAAT CAATCTAAAG TATATATGAG TAAACTTGGT CTGACAGTTA + 2941 CCAATGCTTA ATCAGTGAGG CACCTATCTC AGCGATCTGT CTATTTCGTT CATCCATAGT + 3001 TGCCTGACTC CCCGTCGTGT AGATAACTAC GATACGGGAG GGCTTACCAT CTGGCCCCAG + 3061 TGCTGCAATG ATACCGCGAG ACCCACGCTC ACCGGCTCCA GATTTATCAG CAATAAACCA + 3121 GCCAGCCGGA AGGGCCGAGC GCAGAAGTGG TCCTGCAACT TTATCCGCCT CCATCCAGTC + 3181 TATTAATTGT TGCCGGGAAG CTAGAGTAAG TAGTTCGCCA GTTAATAGTT TGCGCAACGT + 3241 TGTTGCCATT GCTACAGGCA TCGTGGTGTC ACGCTCGTCG TTTGGTATGG CTTCATTCAG + 3301 CTCCGGTTCC CAACGATCAA GGCGAGTTAC ATGATCCCCC ATGTTGTGCA AAAAAGCGGT + 3361 TAGCTCCTTC GGTCCTCCGA TCGTTGTCAG AAGTAAGTTG GCCGCAGTGT TATCACTCAT + 3421 GGTTATGGCA GCACTGCATA ATTCTCTTAC TGTCATGCCA TCCGTAAGAT GCTTTTCTGT + 3481 GACTGGTGAG TACTCAACCA AGTCATTCTG AGAATAGTGT ATGCGGCGAC CGAGTTGCTC + 3541 TTGCCCGGCG TCAATACGGG ATAATACCGC GCCACATAGC AGAACTTTAA AAGTGCTCAT + 3601 CATTGGAAAA CGTTCTTCGG GGCGAAAACT CTCAAGGATC TTACCGCTGT TGAGATCCAG + 3661 TTCGATGTAA CCCACTCGTG CACCCAACTG ATCTTCAGCA TCTTTTACTT TCACCAGCGT + 3721 TTCTGGGTGA GCAAAAACAG GAAGGCAAAA TGCCGCAAAA AAGGGAATAA GGGCGACACG + 3781 GAAATGTTGA ATACTCATAC TCTTCCTTTT TCAATATTAT TGAAGCATTT ATCAGGGTTA + 3841 TTGTCTCATG AGCGGATACA TATTTGAATG TATTTAGAAA AATAAACAAA TAGGGGTTCC + 3901 GCGCACATTT CCCCGAAAAG TGCCACCTGA CGTCTAAGAA ACCATTATTA TCATGACATT + 3961 AACCTATAAA AATAGGCGTA TCACGAGGCC GCCCCTGCAG CCGAATTATA TTATTTTTGC + 4021 CAAATAATTT TTAACAAAAG CTCTGAAGTC TTCTTCATTT AAATTCTTAG ATGATACTTC + 4081 ATCTGGAAAA TTGTCCCAAT TAGTAGCATC ACGCTGTGAG TAAGTTCTAA ACCATTTTTT + 4141 TATTGTTGTA TTATCTCTAA TCTTACTACT CGATGAGTTT TCGGTATTAT CTCTATTTTT + 4201 AACTTGGAGC AGGTTCCATT CATTGTTTTT TTCATCATAG TGAATAAAAT CAACTGCTTT + 4261 AACACTTGTG CCTGAACACC ATATCCATCC GGCGTAATAC GACTCACTAT AGGGAGAGCG + 4321 GCCGCCAGAT CTTCCGGATG GCTCGAGTTT TTCAGCAAGA T +// \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/BBa_E0040_GFP.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/BBa_E0040_GFP.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,85 @@ +LOCUS BBa_E0040_GFP 2875 bp DNA circular UNK 01-JAN-1980 +DEFINITION . +ACCESSION BBa_E0040_GFP +VERSION BBa_E0040_GFP +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..21 + /label="suffix" + misc_feature 22..93 + /label="E. coli his operon terminator" + stem_loop 30..64 + /label="terminator stem loop" + primer_bind 157..176 + /label="VR" + CDS 261..875 + /label="rep(pMB1) (rev)" + misc_feature 276 + /label="ORI" + CDS 1035..1895 + /label="ampR (rev)" + primer_bind 2018..2037 + /label="VF2" + misc_feature 2094..2097 + /label="end of terminator" + stem_loop 2107..2126 + /label="terminator stem loop" + misc_feature 2136..2157 + /label="prefix" + CDS 2156..2875 + /label="GFP protein" + misc_feature 2156..2875 + /label="BBa_E0040_GFP" +ORIGIN + 1 tactagtagc ggccgctgca gtccggcaaa aaagggcaag gtgtcaccac cctgcccttt + 61 ttctttaaaa ccgaaaagat tacttcgcgt tatgcaggct tcctcgctca ctgactcgct + 121 gcgctcggtc gttcggctgc ggcgagcggt atcagctcac tcaaaggcgg taatacggtt + 181 atccacagaa tcaggggata acgcaggaaa gaacatgtga gcaaaaggcc agcaaaaggc + 241 caggaaccgt aaaaaggccg cgttgctggc gtttttccac aggctccgcc cccctgacga + 301 gcatcacaaa aatcgacgct caagtcagag gtggcgaaac ccgacaggac tataaagata + 361 ccaggcgttt ccccctggaa gctccctcgt gcgctctcct gttccgaccc tgccgcttac + 421 cggatacctg tccgcctttc tcccttcggg aagcgtggcg ctttctcata gctcacgctg + 481 taggtatctc agttcggtgt aggtcgttcg ctccaagctg ggctgtgtgc acgaaccccc + 541 cgttcagccc gaccgctgcg ccttatccgg taactatcgt cttgagtcca acccggtaag + 601 acacgactta tcgccactgg cagcagccac tggtaacagg attagcagag cgaggtatgt + 661 aggcggtgct acagagttct tgaagtggtg gcctaactac ggctacacta gaagaacagt + 721 atttggtatc tgcgctctgc tgaagccagt taccttcgga aaaagagttg gtagctcttg + 781 atccggcaaa caaaccaccg ctggtagcgg tggttttttt gtttgcaagc agcagattac + 841 gcgcagaaaa aaaggatctc aagaagatcc tttgatcttt tctacggggt ctgacgctca + 901 gtggaacgaa aactcacgtt aagggatttt ggtcatgaga ttatcaaaaa ggatcttcac + 961 ctagatcctt ttaaattaaa aatgaagttt taaatcaatc taaagtatat atgagtaaac + 1021 ttggtctgac agttaccaat gcttaatcag tgaggcacct atctcagcga tctgtctatt + 1081 tcgttcatcc atagttgcct gactccccgt cgtgtagata actacgatac gggagggctt + 1141 accatctggc cccagtgctg caatgatacc gcgagaccca cgctcaccgg ctccagattt + 1201 atcagcaata aaccagccag ccggaagggc cgagcgcaga agtggtcctg caactttatc + 1261 cgcctccatc cagtctatta attgttgccg ggaagctaga gtaagtagtt cgccagttaa + 1321 tagtttgcgc aacgttgttg ccattgctac aggcatcgtg gtgtcacgct cgtcgtttgg + 1381 tatggcttca ttcagctccg gttcccaacg atcaaggcga gttacatgat cccccatgtt + 1441 gtgcaaaaaa gcggttagct ccttcggtcc tccgatcgtt gtcagaagta agttggccgc + 1501 agtgttatca ctcatggtta tggcagcact gcataattct cttactgtca tgccatccgt + 1561 aagatgcttt tctgtgactg gtgagtactc aaccaagtca ttctgagaat agtgtatgcg + 1621 gcgaccgagt tgctcttgcc cggcgtcaat acgggataat accgcgccac atagcagaac + 1681 tttaaaagtg ctcatcattg gaaaacgttc ttcggggcga aaactctcaa ggatcttacc + 1741 gctgttgaga tccagttcga tataacccac tcgtgcaccc aactgatctt cagcatcttt + 1801 tactttcacc agcgtttctg ggtgagcaaa aacaggaagg caaaatgccg caaaaaaggg + 1861 aataagggcg acacggaaat gttgaatact catactcttc ctttttcaat attattgaag + 1921 catttatcag ggttattgtc tcatgagcgg atacatattt gaatgtattt agaaaaataa + 1981 acaaataggg gttccgcgca catttccccg aaaagtgcca cctgacgtct aagaaaccat + 2041 tattatcatg acattaacct ataaaaatag gcgtatcacg aggcagaatt tcagataaaa + 2101 aaaatcctta gctttcgcta aggatgattt ctggaattcg cggccgcttc tagagatgcg + 2161 taaaggagaa gaacttttca ctggagttgt cccaattctt gttgaattag atggtgatgt + 2221 taatgggcac aaattttctg tcagtggaga gggtgaaggt gatgcaacat acggaaaact + 2281 tacccttaaa tttatttgca ctactggaaa actacctgtt ccatggccaa cacttgtcac + 2341 tactttcggt tatggtgttc aatgctttgc gagataccca gatcatatga aacagcatga + 2401 ctttttcaag agtgccatgc ccgaaggtta tgtacaggaa agaactatat ttttcaaaga + 2461 tgacgggaac tacaagacac gtgctgaagt caagtttgaa ggtgataccc ttgttaatag + 2521 aatcgagtta aaaggtattg attttaaaga agatggaaac attcttggac acaaattgga + 2581 atacaactat aactcacaca atgtatacat catggcagac aaacaaaaga atggaatcaa + 2641 agttaacttc aaaattagac acaacattga agatggaagc gttcaactag cagaccatta + 2701 tcaacaaaat actccaattg gcgatggccc tgtcctttta ccagacaacc attacctgtc + 2761 cacacaatct gccctttcga aagatcccaa cgaaaagaga gaccacatgg tccttcttga + 2821 gtttgtaaca gctgctggga ttacacatgg catggatgaa ctatacaaat aataa +// diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/BBa_J23119_prom.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/BBa_J23119_prom.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,72 @@ +LOCUS BBa_J23119_prom 2190 bp DNA circular UNK 01-JAN-1980 +DEFINITION . +ACCESSION BBa_J23119_prom +VERSION BBa_J23119_prom +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..21 + /label="suffix" + misc_feature 22..93 + /label="E. coli his operon terminator" + stem_loop 30..64 + /label="terminator stem loop" + primer_bind 157..176 + /label="VR" + CDS 261..875 + /label="rep(pMB1) (rev)" + misc_feature 276 + /label="ORI" + CDS 1035..1895 + /label="ampR (rev)" + primer_bind 2018..2037 + /label="VF2" + misc_feature 2094..2097 + /label="end of terminator" + stem_loop 2107..2126 + /label="terminator stem loop" + misc_feature 2136..2157 + /label="prefix" + misc_feature 2156..2190 + /label="BBa_J23119_prom" +ORIGIN + 1 tactagtagc ggccgctgca gtccggcaaa aaagggcaag gtgtcaccac cctgcccttt + 61 ttctttaaaa ccgaaaagat tacttcgcgt tatgcaggct tcctcgctca ctgactcgct + 121 gcgctcggtc gttcggctgc ggcgagcggt atcagctcac tcaaaggcgg taatacggtt + 181 atccacagaa tcaggggata acgcaggaaa gaacatgtga gcaaaaggcc agcaaaaggc + 241 caggaaccgt aaaaaggccg cgttgctggc gtttttccac aggctccgcc cccctgacga + 301 gcatcacaaa aatcgacgct caagtcagag gtggcgaaac ccgacaggac tataaagata + 361 ccaggcgttt ccccctggaa gctccctcgt gcgctctcct gttccgaccc tgccgcttac + 421 cggatacctg tccgcctttc tcccttcggg aagcgtggcg ctttctcata gctcacgctg + 481 taggtatctc agttcggtgt aggtcgttcg ctccaagctg ggctgtgtgc acgaaccccc + 541 cgttcagccc gaccgctgcg ccttatccgg taactatcgt cttgagtcca acccggtaag + 601 acacgactta tcgccactgg cagcagccac tggtaacagg attagcagag cgaggtatgt + 661 aggcggtgct acagagttct tgaagtggtg gcctaactac ggctacacta gaagaacagt + 721 atttggtatc tgcgctctgc tgaagccagt taccttcgga aaaagagttg gtagctcttg + 781 atccggcaaa caaaccaccg ctggtagcgg tggttttttt gtttgcaagc agcagattac + 841 gcgcagaaaa aaaggatctc aagaagatcc tttgatcttt tctacggggt ctgacgctca + 901 gtggaacgaa aactcacgtt aagggatttt ggtcatgaga ttatcaaaaa ggatcttcac + 961 ctagatcctt ttaaattaaa aatgaagttt taaatcaatc taaagtatat atgagtaaac + 1021 ttggtctgac agttaccaat gcttaatcag tgaggcacct atctcagcga tctgtctatt + 1081 tcgttcatcc atagttgcct gactccccgt cgtgtagata actacgatac gggagggctt + 1141 accatctggc cccagtgctg caatgatacc gcgagaccca cgctcaccgg ctccagattt + 1201 atcagcaata aaccagccag ccggaagggc cgagcgcaga agtggtcctg caactttatc + 1261 cgcctccatc cagtctatta attgttgccg ggaagctaga gtaagtagtt cgccagttaa + 1321 tagtttgcgc aacgttgttg ccattgctac aggcatcgtg gtgtcacgct cgtcgtttgg + 1381 tatggcttca ttcagctccg gttcccaacg atcaaggcga gttacatgat cccccatgtt + 1441 gtgcaaaaaa gcggttagct ccttcggtcc tccgatcgtt gtcagaagta agttggccgc + 1501 agtgttatca ctcatggtta tggcagcact gcataattct cttactgtca tgccatccgt + 1561 aagatgcttt tctgtgactg gtgagtactc aaccaagtca ttctgagaat agtgtatgcg + 1621 gcgaccgagt tgctcttgcc cggcgtcaat acgggataat accgcgccac atagcagaac + 1681 tttaaaagtg ctcatcattg gaaaacgttc ttcggggcga aaactctcaa ggatcttacc + 1741 gctgttgaga tccagttcga tataacccac tcgtgcaccc aactgatctt cagcatcttt + 1801 tactttcacc agcgtttctg ggtgagcaaa aacaggaagg caaaatgccg caaaaaaggg + 1861 aataagggcg acacggaaat gttgaatact catactcttc ctttttcaat attattgaag + 1921 catttatcag ggttattgtc tcatgagcgg atacatattt gaatgtattt agaaaaataa + 1981 acaaataggg gttccgcgca catttccccg aaaagtgcca cctgacgtct aagaaaccat + 2041 tattatcatg acattaacct ataaaaatag gcgtatcacg aggcagaatt tcagataaaa + 2101 aaaatcctta gctttcgcta aggatgattt ctggaattcg cggccgcttc tagagttgac + 2161 agctagctca gtcctaggta taatgctagc +// diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/BBa_J23119_prom_flawed.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/BBa_J23119_prom_flawed.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,72 @@ +LOCUS BBa_J23119_prom 2190 bp DNA circular UNK 01-JAN-1980 +DEFINITION . +ACCESSION BBa_J23119_prom +VERSION BBa_J23119_prom +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..21 + /label="suffix" + misc_feature 22..93 + /label="E. coli his operon terminator" + stem_loop 30..64 + /label="terminator stem loop" + primer_bind 157..176 + /label="VR" + CDS 261..875 + /label="rep(pMB1) (rev)" + misc_feature 276 + /label="ORI" + CDS 1035..1895 + /label="ampR (rev)" + primer_bind 2018..2037 + /label="VF2" + misc_feature 2094..2097 + /label="end of terminator" + stem_loop 2107..2126 + /label="terminator stem loop" + misc_feature 2136..2157 + /label="prefix" + misc_feature 2156..2190 + /label="BBa_J23119_prom" +ORIGIN + 1 tactagtagc ggccgctgca gtccggcaaa aaagggcaag gtgtcaccac cctgcccttt + 61 ttctttaaaa ccgaaaagat tacttcgcgt tatgcaggct tcctcgctca ctgactcgct + 121 gcgctcggtc gttcggctgc ggcgagcggt atcagctcac tcaaaggcgg taatacggtt + 181 atccacagaa tcaggggata acgcaggaaa gaacatgtga gcaaaaggcc agcaaaaggc + 241 caggaaccgt aaaaaggccg cgttgctggc gtttttccac aggctccgcc cccctgacga + 301 gcatcacaaa aatcgacgct caagtcagag gtggcgaaac ccgacaggac tataaagata + 361 ccaggcgttt ccccctggaa gctccctcgt gcgctctcct gttccgaccc tgccgcttac + 421 cggatacctg tccgcctttc tcccttcggg aagcgtggcg ctttctcata gctcacgctg + 481 taggtatctc agttcggtgt aggtcgttcg ctccaagctg ggctgtgtgc acgaaccccc + 541 cgttcagccc gaccgctgcg ccttatccgg taactatcgt cttgagtcca acccggtaag + 601 acacgactta tcgccactgg cagcagccac tggtaacagg attagcagag cgaggtatgt + 661 aggcggtgct acagagttct tgaagtggtg gcctaactac ggctacacta gaagaacagt + 721 atttggtatc tgcgctctgc tgaagccagt taccttcgga aaaagagttg gtagctcttg + 781 atccggcaaa caaaccaccg ctggtagcgg tggttttttt gtttgcaagc agcagattac + 841 gcgcagaaaa aaaggatctc aagaagatcc tttgatcttt tctacggggt ctgacgctca + 901 gtggaacgaa aactcacgtt aagggatttt ggtcatgaga ttatcaaaaa ggatcttcac + 961 ctagatcctt ttaaattaaa aatgaagttt taaatcaatc taaagtatat atgagtaaac + 1021 ttggtctgac agttaccaat gcttaatcag tgaggcacct atctcagcga tctgtctatt + 1081 tcgttcatcc atagttgcct gactccccgt cgtgtagata actacgatac gggagggctt + 1141 accatctggc cccagtgctg caatgatacc gcgagaccca cgctcaccgg ctccagattt + 1201 atcagcaata aaccagccag ccggaagggc cgagcgcaga agtggtcctg caactttatc + 1261 cgcctccatc cagtctatta attgttgccg ggaagctaga gtaagtagtt cgccagttaa + 1321 tagtttgcgc aacgttgttg ccattgctac aggcatcgtg gtgtcacgct cgtcgtttgg + 1381 tatggcttca ttcagctccg gttcccaacg atcaaggcga gttacatgat cccccatgtt + 1441 gtgcaaaaaa gcggttagct ccttcggtcc tccgatcgtt gtcagaagta agttggccgc + 1501 agtgttatca ctcatggtta tggcagcact gcataattct cttactgtca tgccatccgt + 1561 aagatgcttt tctgtgactg gtgagtactc aaccaagtca ttctgagaat agtgtatgcg + 1621 gcgaccgagt tgctcttgcc cggcgtcaat acgggataat accgcgccac atagcagaac + 1681 tttaaaagtg ctcatcattg gaaaacgttc ttcggggcga aaactctcaa ggatcttacc + 1741 gctgttgaga tccagttcga tataacccac tcgtgcaccc aactgatctt cagcatcttt + 1801 tactttcacc agcgtttctg ggtgagcaaa aacaggaagg caaaatgccg caaaaaaggg + 1861 aataagggcg acacggaaat gttgaatact catactcttc ctttttcaat attattgaag + 1921 catttatcag ggttattgtc tcatgagcgg atacatattt gaatgtattt agaaaaataa + 1981 acaaataggg gttccgcgca catttccccg aaaagtgcca cctgacgtct aagaaaccat + 2041 tattatcatg acattaacct ataaaaatag gcgtatcacg aggcagaatt tcagataaaa + 2101 aaaatcctta gctttcgcta aggatgattt ctggatttcg cggccgcttc tagagttgac + 2161 agctagctca gtcctaggta taatgctagc +// diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/BBa_J61048_term.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/BBa_J61048_term.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,73 @@ +LOCUS BBa_J61048_term 2268 bp DNA circular UNK 01-JAN-1980 +DEFINITION . +ACCESSION BBa_J61048_term +VERSION BBa_J61048_term +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..21 + /label="suffix" + misc_feature 22..93 + /label="E. coli his operon terminator" + stem_loop 30..64 + /label="terminator stem loop" + primer_bind 157..176 + /label="VR" + CDS 261..875 + /label="rep(pMB1) (rev)" + misc_feature 276 + /label="ORI" + CDS 1035..1895 + /label="ampR (rev)" + primer_bind 2018..2037 + /label="VF2" + misc_feature 2094..2097 + /label="end of terminator" + stem_loop 2107..2126 + /label="terminator stem loop" + misc_feature 2136..2157 + /label="prefix" + misc_feature 2156..2268 + /label="BBa_J61048_term" +ORIGIN + 1 tactagtagc ggccgctgca gtccggcaaa aaagggcaag gtgtcaccac cctgcccttt + 61 ttctttaaaa ccgaaaagat tacttcgcgt tatgcaggct tcctcgctca ctgactcgct + 121 gcgctcggtc gttcggctgc ggcgagcggt atcagctcac tcaaaggcgg taatacggtt + 181 atccacagaa tcaggggata acgcaggaaa gaacatgtga gcaaaaggcc agcaaaaggc + 241 caggaaccgt aaaaaggccg cgttgctggc gtttttccac aggctccgcc cccctgacga + 301 gcatcacaaa aatcgacgct caagtcagag gtggcgaaac ccgacaggac tataaagata + 361 ccaggcgttt ccccctggaa gctccctcgt gcgctctcct gttccgaccc tgccgcttac + 421 cggatacctg tccgcctttc tcccttcggg aagcgtggcg ctttctcata gctcacgctg + 481 taggtatctc agttcggtgt aggtcgttcg ctccaagctg ggctgtgtgc acgaaccccc + 541 cgttcagccc gaccgctgcg ccttatccgg taactatcgt cttgagtcca acccggtaag + 601 acacgactta tcgccactgg cagcagccac tggtaacagg attagcagag cgaggtatgt + 661 aggcggtgct acagagttct tgaagtggtg gcctaactac ggctacacta gaagaacagt + 721 atttggtatc tgcgctctgc tgaagccagt taccttcgga aaaagagttg gtagctcttg + 781 atccggcaaa caaaccaccg ctggtagcgg tggttttttt gtttgcaagc agcagattac + 841 gcgcagaaaa aaaggatctc aagaagatcc tttgatcttt tctacggggt ctgacgctca + 901 gtggaacgaa aactcacgtt aagggatttt ggtcatgaga ttatcaaaaa ggatcttcac + 961 ctagatcctt ttaaattaaa aatgaagttt taaatcaatc taaagtatat atgagtaaac + 1021 ttggtctgac agttaccaat gcttaatcag tgaggcacct atctcagcga tctgtctatt + 1081 tcgttcatcc atagttgcct gactccccgt cgtgtagata actacgatac gggagggctt + 1141 accatctggc cccagtgctg caatgatacc gcgagaccca cgctcaccgg ctccagattt + 1201 atcagcaata aaccagccag ccggaagggc cgagcgcaga agtggtcctg caactttatc + 1261 cgcctccatc cagtctatta attgttgccg ggaagctaga gtaagtagtt cgccagttaa + 1321 tagtttgcgc aacgttgttg ccattgctac aggcatcgtg gtgtcacgct cgtcgtttgg + 1381 tatggcttca ttcagctccg gttcccaacg atcaaggcga gttacatgat cccccatgtt + 1441 gtgcaaaaaa gcggttagct ccttcggtcc tccgatcgtt gtcagaagta agttggccgc + 1501 agtgttatca ctcatggtta tggcagcact gcataattct cttactgtca tgccatccgt + 1561 aagatgcttt tctgtgactg gtgagtactc aaccaagtca ttctgagaat agtgtatgcg + 1621 gcgaccgagt tgctcttgcc cggcgtcaat acgggataat accgcgccac atagcagaac + 1681 tttaaaagtg ctcatcattg gaaaacgttc ttcggggcga aaactctcaa ggatcttacc + 1741 gctgttgaga tccagttcga tataacccac tcgtgcaccc aactgatctt cagcatcttt + 1801 tactttcacc agcgtttctg ggtgagcaaa aacaggaagg caaaatgccg caaaaaaggg + 1861 aataagggcg acacggaaat gttgaatact catactcttc ctttttcaat attattgaag + 1921 catttatcag ggttattgtc tcatgagcgg atacatattt gaatgtattt agaaaaataa + 1981 acaaataggg gttccgcgca catttccccg aaaagtgcca cctgacgtct aagaaaccat + 2041 tattatcatg acattaacct ataaaaatag gcgtatcacg aggcagaatt tcagataaaa + 2101 aaaatcctta gctttcgcta aggatgattt ctggaattcg cggccgcttc tagagccggc + 2161 ttatcggtca gtttcacctg atttacgtaa aaacccgctt cggcgggttt ttgcttttgg + 2221 aggggcagaa agatgaatga ctgtccacga cgctataccc aaaagaaa +// diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/BBa_K611094_bar.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/BBa_K611094_bar.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,74 @@ +LOCUS BBa_K611094_br 2180 bp DNA circular UNK 01-JAN-1980 +DEFINITION . +ACCESSION BBa_K611094_br +VERSION BBa_K611094_br +KEYWORDS . +SOURCE . + ORGANISM . + . +FEATURES Location/Qualifiers + misc_feature 1..21 + /label="suffix" + misc_feature 22..93 + /label="E. coli his operon terminator" + stem_loop 30..64 + /label="terminator stem loop" + primer_bind 157..176 + /label="VR" + CDS 261..875 + /label="rep(pMB1) (rev)" + misc_feature 276 + /label="ORI" + CDS 1035..1895 + /label="ampR (rev)" + primer_bind 2018..2037 + /label="VF2" + misc_feature 2094..2097 + /label="end of terminator" + stem_loop 2107..2126 + /label="terminator stem loop" + misc_feature 2136..2157 + /label="prefix" + misc_feature 2156..2180 + /label="Barcode" + misc_feature 2156..2180 + /label="BBa_K611094_br" +ORIGIN + 1 tactagtagc ggccgctgca gtccggcaaa aaagggcaag gtgtcaccac cctgcccttt + 61 ttctttaaaa ccgaaaagat tacttcgcgt tatgcaggct tcctcgctca ctgactcgct + 121 gcgctcggtc gttcggctgc ggcgagcggt atcagctcac tcaaaggcgg taatacggtt + 181 atccacagaa tcaggggata acgcaggaaa gaacatgtga gcaaaaggcc agcaaaaggc + 241 caggaaccgt aaaaaggccg cgttgctggc gtttttccac aggctccgcc cccctgacga + 301 gcatcacaaa aatcgacgct caagtcagag gtggcgaaac ccgacaggac tataaagata + 361 ccaggcgttt ccccctggaa gctccctcgt gcgctctcct gttccgaccc tgccgcttac + 421 cggatacctg tccgcctttc tcccttcggg aagcgtggcg ctttctcata gctcacgctg + 481 taggtatctc agttcggtgt aggtcgttcg ctccaagctg ggctgtgtgc acgaaccccc + 541 cgttcagccc gaccgctgcg ccttatccgg taactatcgt cttgagtcca acccggtaag + 601 acacgactta tcgccactgg cagcagccac tggtaacagg attagcagag cgaggtatgt + 661 aggcggtgct acagagttct tgaagtggtg gcctaactac ggctacacta gaagaacagt + 721 atttggtatc tgcgctctgc tgaagccagt taccttcgga aaaagagttg gtagctcttg + 781 atccggcaaa caaaccaccg ctggtagcgg tggttttttt gtttgcaagc agcagattac + 841 gcgcagaaaa aaaggatctc aagaagatcc tttgatcttt tctacggggt ctgacgctca + 901 gtggaacgaa aactcacgtt aagggatttt ggtcatgaga ttatcaaaaa ggatcttcac + 961 ctagatcctt ttaaattaaa aatgaagttt taaatcaatc taaagtatat atgagtaaac + 1021 ttggtctgac agttaccaat gcttaatcag tgaggcacct atctcagcga tctgtctatt + 1081 tcgttcatcc atagttgcct gactccccgt cgtgtagata actacgatac gggagggctt + 1141 accatctggc cccagtgctg caatgatacc gcgagaccca cgctcaccgg ctccagattt + 1201 atcagcaata aaccagccag ccggaagggc cgagcgcaga agtggtcctg caactttatc + 1261 cgcctccatc cagtctatta attgttgccg ggaagctaga gtaagtagtt cgccagttaa + 1321 tagtttgcgc aacgttgttg ccattgctac aggcatcgtg gtgtcacgct cgtcgtttgg + 1381 tatggcttca ttcagctccg gttcccaacg atcaaggcga gttacatgat cccccatgtt + 1441 gtgcaaaaaa gcggttagct ccttcggtcc tccgatcgtt gtcagaagta agttggccgc + 1501 agtgttatca ctcatggtta tggcagcact gcataattct cttactgtca tgccatccgt + 1561 aagatgcttt tctgtgactg gtgagtactc aaccaagtca ttctgagaat agtgtatgcg + 1621 gcgaccgagt tgctcttgcc cggcgtcaat acgggataat accgcgccac atagcagaac + 1681 tttaaaagtg ctcatcattg gaaaacgttc ttcggggcga aaactctcaa ggatcttacc + 1741 gctgttgaga tccagttcga tataacccac tcgtgcaccc aactgatctt cagcatcttt + 1801 tactttcacc agcgtttctg ggtgagcaaa aacaggaagg caaaatgccg caaaaaaggg + 1861 aataagggcg acacggaaat gttgaatact catactcttc ctttttcaat attattgaag + 1921 catttatcag ggttattgtc tcatgagcgg atacatattt gaatgtattt agaaaaataa + 1981 acaaataggg gttccgcgca catttccccg aaaagtgcca cctgacgtct aagaaaccat + 2041 tattatcatg acattaacct ataaaaatag gcgtatcacg aggcagaatt tcagataaaa + 2101 aaaatcctta gctttcgcta aggatgattt ctggaattcg cggccgcttc tagagcgctg + 2161 atagtgctag tgtagatcgc +// diff -r 000000000000 -r dc450979fcd4 test-data/BioBrick_assembly_dataset/hierarchical_biobrick.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/BioBrick_assembly_dataset/hierarchical_biobrick.csv Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,4 @@ +construct,parts, +C1_prom_barcode,BBa_K611094_bar,BBa_J23119_prom +C2_gfp_term,BBa_E0040_GFP,BBa_J61048_term +C3_final,C1_prom_barcode,C2_gfp_term diff -r 000000000000 -r dc450979fcd4 test-data/Gibson_assembly_dataset/gibson_assembly.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Gibson_assembly_dataset/gibson_assembly.csv Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,6 @@ +construct, parts,,,, +construct_1,Frag_1,Frag_2,Frag_3,Frag_4,Frag_5 +construct_2,Frag_1,Frag_2,Frag_3,Frag_4b,Frag_5 +construct_3,Frag_1,Frag_2,Frag_3,Frag_4c,Frag_5b +construct_4,Frag_1,Frag_2,Frag_3,Frag_4, +construct_5,Frag_1,Frag_2,Frag_3,Frag_4,Frag_4b diff -r 000000000000 -r dc450979fcd4 test-data/Gibson_assembly_dataset/gibson_sequences.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Gibson_assembly_dataset/gibson_sequences.fa Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,347 @@ +>expected_sequence +CTCCACCTGCGTCTATCGTACAGTGCTAAAATGGCAGCAGATAGTGAAACTTCCGCTAAG +CTAGCCCCTCAGGGTACACTGCACCGAGGCGTGCTGTCAATACTTGATTAAGTCGGGTTG +TCGGGGACCTGCCGTCACGCTTCCGAGTGTATATCCGGATTTAGTTGACGTCATACAGAG +GCACTAAGAAGAATAAACGCTTACCTCCAGCAATCGTGTAGTGTCAGGTGTACGTTCTCC +CTTGCGCCGTTCGGCAAGCGTCCGGTGTCGGGCTGCAAGGAATAAATCTTTATGGACCAG +AGGGGCTTGTTTCCTCATATGGGTGCGTGCACTTATACGATTCAAAGGTGGATATGGCCG +CATAACACGTAGCCAGGCTATAGTCCCGCGGCCTAATTCCTTCGAGTGCGGGTGCCTGTT +TTTGTTTTTCCTTTACGACACGAACCGCTCTAACCTGCTCTATTTCGCCACGTTCCAGTG +AACCTCTTAGCCTACCGCCCACGTACGGTGGGACGCGTCGAGCAGTTAAGGTACTGTGGA +GAAATCGTTCAATATTAGAAAACAGGCGGTGTACGAATTACTGTGTCCCGGTGTTGCCCG +TTTAACGGCTGCCGTGGTCATACCGTGAGGCACCACGAGGGGATGCTACGCAACATGCGA +GGTGTAATCAGCAGGGAAGATCCCGGGGATCGAAAGCGGTCCGCGATTTGCGACCGATAT +GCATAAGGTGTCATTTATATTACACCTACGACACTGGTACCGGCTCACAGCCAAATGCAC +AGTCTCAAGATAGAATTCGCAAAAATTAGGCTTATCGGGCGCTTACTCTTTGTTACACTT +CTGGTCTGTGAGTGACGCCCTGTGTCCCATCACTGCATGTGAGGATGCGTGTACTGCACC +TGTGAATTACAGACGTTTCCCTACCAGGGCGCTACGTTATAATGTTCGGTTGCAACCCTC +TATAGGGTGATCTCGACATACCACTATGGTTTGCGCGTCTCGGGTGCTTTTGGCGAGGGC +CAGGTACGGCGGTGAGACAGCGTATTTTGCGAATAGGGTATTCGGACGCACGGTGTATAG +CTCTTGGAAAACGACTCTTCGACGGGCGTGTAGAAGTAGCCAAGCAGTTCGGTTGTTATC +ACTAAATGCGGACCATGTGCAGCCGTTTAGGAGAGGAGTCTGCAATGGATCAGCTGGTCA +GATTGTACCACTCGGTTGAGCCTAAGGCACTTCAGGTGCTCGGATTGTAATGAACCCTCA +GGTTACGCCGTTGCGCTGAGATCCCACAAGCCATAAGCAATGAGAATCGAGCAGCTATCG +AATTACGCAGCACTTGCTGATGAACTGGGTGCACCTAAAGCGCCCGATAAAACGGAGGGA +GTGAGGTAACTACTACATTGCGAGTAGTGCGAGGGGCCTCCGAGACTACATTAGAGCAGC +CATGCCGAATACGACAAAATCTATAGCCTTGCGAACGTAGGGGACTGATTTATGGACAAC +AGGACCGAAACAAGTTGGAGCCACAACGGAGCTATGGACGTTTTCACTATTGATGGGTGG +CTTGCGTCCGCATCCCAGGTTTCTCTGGTTAATTTTCATGACGATTCTTTGTGGTACGAT +CCAGGCATAAGATAGGGCACCTAACCCGATACTGTGAACTTATATGACGGCTGAACATGC +CCTAATAGTCTGCCAAAAGGGAAACCTATAACGGCTCTAAACGGGCCCCGGGTACGTAAC +AGCAATGTTCCTGTCCACCAAGGCGTTTTTAGTAACCGCCGATCGATCTCGATCGAATTC +CGTTTTACGCCCCAGATCGCAGCTCCCTTGACAGTGTCAGTTGAATCCCCATTCTAGGTT +TCCTGATAAAACTAAGTACGGGGACGCATCAGTCGAAAGCAATCACAGATAATAGATATC +TTGAGCAGAAAGACGTCTTCACGAATCAGGAGCGAGTGTCCCCTTTGAAAGTGCGGGCAA +CAATAGGTGTGAGGTTTCACGGTTGCTTGATCAGTAGCTGTTCACTCTCTCGCAGGCCTA +CATAGTGAGGTGGTGAGATTGTTCAGGTCTGTCTCCTGCGCGGTTGTCGAGCCCCGTCGG +GCACGCTAACAAGGACAGGAATTCGGGTCGTCAAATAACAATTTACGTCATCGCATCCCT +CAAGGGTAAGGCGTGATCTCCATCTCGGTTCCCAATTGGTCATTGTGATAGCCACACTGC +AGTTACCGAAGGCGATTTTATACTACCCTGATAAGTGCTCGCAGCCCCTCCTCCCTAATA +ATGTAACGACGAGCATTAACCCAGGAGTATTTCTCACGTTGCTCGTGCCCAGCCAAGGCG +ATCAACGCAGACCGGATAGCGTTAGCTAGCCCACGGGTAAAAGCGATTGTTGTACTCTTT +CATATGAATACAGCACTCGCGAATGCAAGCCTACGTCGGCTCGATTCTCTTCCTCACCAC +ACTGGAGGCGATGATCTAGCCCGTCTTTCACCCCGTACCTTTATACTCGGCCGCGTATTT +CTCCACCGGGAGCTCTGCCTACCAAAGCGTCGTACGCCCAGGAAGCCGTTTCCTTTATCC +TCCCTCATGTCTCAACGCGCGAGCATCGGAGAGTACGCGAAATCGACTGTCTGGCCAACA +CTGGACAGACGCGCGAGACGCTGATAAACCCCCATTTTCGTGGGGTTAGTAAATCGGTGG +CTTCAAACGGTTCGATCGCTCGCACGAGCATTAGTCAGGAGTATGATTGCGCATCATCTC +TACTCCGTACTAGGTTCTGTAGCTCAGGATCTCCGGGGGGATCAAGCTTTTATGTCCCGC +GGGATTAGAAGATATCCATCCATATGTTTACTTTGATGCGGGTCCGAGCTCTCTATGATG +CTGCTTTGACAAAATGTGGAAAGCCTCAGCGACCGCTCTGGGGTCAAAGCTCTACGACAC +GTTGATGCTAAACAAGTTGGACTACTAAGCAAATGAAGCGGAAGAGATACTGAGCGCTCT +CTCTATGACTTTCGCTATATGGCGACGTGCGAAAAACGCGATTTGGCCGAAATCTCAAAT +AATAATGCAATGTGTTGGCTAAGCTCGGGCAAACTCAGGATGTGACCTAGTACCCTCTGC +CCCGTGAGTACCCATACCCAATCAAGCCAGCCCAATTCGCGATAGGTACAGATAAGCCTT +ACTCGTCCCGATCCTTGACTAAAACACTCATCCTCGATAGTACGTGCCTTGTAGAGTCCG +ACGGCCATAGGTAAACGACCAGTTAGTGTCGTAGGGTCCTCCACTTTAGCTACGATTGCA +ACTCCCTTACCTTCGTGCCGGCGCGCTCCTCTTGCTACCATCACGCTTCAAATCGGGTGG +AAGACCAACTGATGAATGAGGGTGCAGACCCCAGTGGAGCATAATACGATGCGCCCACGA +TACTCTAGAGCCACGCGTCGACGTTCGTAAGATGTCCCGGGCCCGTGGGTGGCCACATCG +GCCGCCCATCCTTGTGTCTTCCGATAAGTACTCCCTTTCAAGGACTCTCCGATACCGGCT +TAGAACACGAACGGACAACGCTTTCAGCGCTGAGAAATGACGGCTGCTTCAACCAGAACG +ATACAGCCTTCTTTGCTAGATAGCCTCTAGTGTCAGCCACCAAGTCCGCCACCTAATCCT +TCGCCGAGGGTCAAAGCTTCTGACCATCCAATTGTTAGGCACTCTTGGAACCTTGCAAGT +ACCTGATCATGCAGAAAATGGACTTGCCCGGAGCATATTCATTATAAGTACCATTCACGG +CAGGTTATAGTATGCGTTGACGCGGGGTGATACGGTAATGCAAGTAACCATCGGGCCGCG +GTATGCACATAGAAGGGTGTTTGACTTGGTTTCTTACAGTGCAACTGCATATGTGGACCG +CCATTCCAAGTGAAAGGATACGAAGGGAGGTTTCGGACTGCTATCCAGAGCATGGAAAGC +CTTCAAGGTGGTGCGCACTGGAATAGTGACTCGGGGATTGTATGTCGAGCCGCGTCCAGT +CTTTCCTATACTTATGCGAGCAAATAGACCGAGTGGAACTTGATTGCATCAGCCGAATCA +CGATGAAAACTCAGCTTACAATTCGTAAGCACTTCAAATATATGCCCGGAAAATCGCCGT +CCAGTGACTTGTAATAGCTCCAGGGTCAGTAGCGCTGCCCTGGCCGAACCAGGTGACGGT +TACGTGGACCTCACAGTATCAATGTTGCAGTGCGCCACCAGATTAAGCTGGCCGTGTCAT +GGTGAACAACCGTGATAAAAGAACGGGAGTATGAGAGTGCCACCCAAGAGTGGTACTCCA +GGATTCGGAAAGGGGAGGGGGCTTTTCCTGCTTAACAACCTTCCTCGGCACGGTCTAAGC +GTCTCACTCGTCCTAAACTCTACCATCATGCAGCTAGGCGAAAGTATAACTTTAGGGTCT +ATCAGGATGAGCTATCTGTACCACGGTGATGAGATAAGTAGGGGGTGCGCTAGGGTCGTT +CGTGTTTTCATCGTCTATAGTACCTATTTCTCGTCTTCCATTAGCGCATACGGCCATCTC +GCGGCGGCTACGTCAGCCAGCGTCAACCCCAAGTCACTAATTTCCGTTATTATAGGTGTA +GCTCATAACTTCTTCTGCCGACCAAGCCAAAAGCTTTGCGGCAGCTGCAAGGCCACGTTA +GGCTCTCGGACGGCAGATATTTATTAATAACACCACTGGGTTCCAGCTCTTAACGCACTT +CCCTTCACTGGATCACCTGTACGCCCATAAGCTAAGCCTGAAGCCACGGCCTCAATCGTG +GACATTACTCTTAGCAAATCCACATTACCATGTCACCAATCCCCGAACCCCGCGCCGACC +TAAGGCGGGGTACCTCGTGCACGGCAGCATCGAGGGAAATATATTACAAAAGACGCGGCG +ATCCAACGGTTGATAAAAACAGTTCCGTGGAAGGCGGACGTGTGCTTAGCGCATGCTTCT +CACCGAGTGACAGCACCGATTCCATCTCGCTAGCACGCCGATGAGCCTCGCTGTGGGACT +ATGGTCCAGCCTATGCTTAACTAGGCAGAACAGAGAGTATGGTGGTCCTCGGTAAGGTCG +ATTTGGTGGCACTCGCAAGATACGCGGGTTATTAGTAGAGCGCAGTCAGGGCCCATCGCC +AGCATTTGCATGCTCGCCGTACGCTATCCTTGTACTACGGTTCCTCAGTGGCTATAAAGG +AATGTCACAACTCCATTAGCCCCTTTTGTTCAGCGCTTAGTCATGGTGGGACTGACTCAC +CGAGACTACATACGGATTGTGCCCGCAGAGACATGCCTTTCATGCTGGGGTTTTGTAAGC +AGCTCCACGGGGTAGTGTCACCATGCCTTCGACCTATGCTTATGAAACCCGAGGTAATGC +GGCAGGATCCCGTGGATCCTACGCTTAGCCTGTTTAAGCCATCGTTCTCCTCGGCCCCCA +GCCAGCGCGTATTCCGCGCATTCCTCCGAGTGCGTATCGCGCAAACCACAAGATACCTCT +CAACGACTAGCGGTGAACCACCAAGCTTGACATACGTGCCCCTTCGGAAACGTTATTAGT +GGTCCTGAGTGGTACGTCCCGAGTATACGGGGCAGCGATAGTAACGCTACAAATAGCTAG +GCGAGGTTGCGATACCTGAATGAATATCCAACAGTGGCAACGAGACGCTTAGAAAGGGAA +ACTACATACCAGTTCAATCCGGGATGACTTCAAAAGGGTATGTTTGTTTTGAAGCGAAGC +TTTGGTTGTCATTCAGTTCCTAGAGTTTACGGCGCTGATATGGCTCTAGGAGTCCAGTGA +ATTCGTCAGCATAAGCCCCACATCGTAGGAAATTGCCCGGTCGTCGGTCTAGCAAATGCT +CGGTAAACTGGGAGTGTACTAACACCATGTCGTGGTAACCCGGTGGACCGGCTACTAGAG +ACATATTTATTCATGCTCAGTTGAACCTGTTTCGAGGGGGCCGAGTTAACATAAAGTGAT +TAGTCACAGCGCATTAAGATAGAACAGGGATATAGCTCGTTGCATCCATAAATTGCCCAC +CGTAGGGAGAACGCCGTTGGAAAATGAGAGTTAGGCGATGAGCGGTTGTCGCCCGCACAG +ACAAAGGATGCCTTTATAAATAGTAGCGGCCTTGTCTGCACCAGACGTTTGTGGGTCGAC +GTGCTATTCTAGCCAAAAGCAAAATGTATCATACTACAGGCAATAGGCCCTATGCTGACC +AAACTTGGGGACCCTTGCGCCTTCTCTCTTACGCATAACATGCATAGGTACCTCTACTCA +TCAGGCAGGGCGGCGTGAGGTCACTATGGCTCAAGATGTGTACGACTAAAGAAAGGTTTA +TGCTCCTTCCCCAAGGACGCATTTGGGACTGCTACTTGCCCCTAGCGAATTCACTAGGAT +TTTTGTAGAACCATGAGCGCCCTATCCGATAGCACAGAGACAATGCTACAAGCAACTGTG +CATGCGCTCGATCGCCGTGCATTAATACGTATTATAGCGTATCGTGTACGCTAATATCTT +AGTGCACCGCACGCTGGTTGGATACAATTCCGTGAAATAATTCCTGCTTACACAGGGGCT +TTCTGGCGCATGGCTGTGTCTGGATGTTTGTGATAAGAGGCTCCATGAACCCGGCGGGAA +ATGAGGGGAAACCCCGGGGAGCAACGACACTAAGCCTGGCAGTTGTTCAGATAGGACGCT +TTTGTCAGTGGGCGGTTTTGCATCCACTAACTATCATAAACGACACAACGTCGAATGATC +ATCGGCGGTTTGCATGTAGAGACAGCTCCTGCAACACCATACCGGCTCGTGGAATTACGC +TGTGCGCCGGCCCTCATTCGATTGTGTCCAGCGTCGGCGCGAATGATGATTGAGATGTGC +TTCGGCTCAGAGCGTGCACAGGTAGTTCTCCCCCCTTACCTAGCTGATCCAGCAATTCCA +CGCTGGCCCGCGCGTCCTTGGGTAACACTGGATCTGTACCCAAATACACCGTCGTGATTG +TTAACTACCGTATGGCGGTTACGATCGAGTGACACTATTATTAGACCGCTACTCCAAGTC +CCAACTTTTCATCGCCATACAGAAGAGAACGAGAAAGTCGAAGAGTTATAGCGTGTACAC +TTCTGATTAGCTAACGAATCGATGCGTACAGCATACTCGAGCAACTTTATGGGTCGTCTG +TCTCCTTGTACCCCGCTTGACGAGGATGTGTTTGTAGGCGGCTTGGATATCGGCAGTGTA +TCCGCATGGTCAAACACTTTAGCCCGACCGGTGCTCAAGGTCGAAATCCTAATATAGATT +GGGATAGGACCCGTCCATTACTAGGCGACATTCGCAGTCACGAGCCCGCACGGGGTGCCT +CAACACGGTGACACTCAATTCGCCGGCACTGAGTACGACGATCGTGATTGCCATACCGTC +ATGTTTTGTGTTACCCCTGATAAGAAGCGCAGATATGGCACGGGGGCAGAGGGCGGAGTA +GACCCATGCAAAACTCTAGCCTCTGGGCAGCATGATCGACGTAGAACGCCGTTGTGACTC +ACTAGTTTCAGTTATGATGTTCAGCCAAGAATCCTCCATTTTGACGTGAACCTTCTCCCT +TTACACCTTGAAGCTGCAATATTAAGTCGACCCTTTAAGTTGCCTTGTCTTGGTCTAGAG +CCCGCAGCTCACCGATAAAATGAGCAGTCAATTTTAGCGGCGGTTGTCCAGGTTACATGC +ACTTTGGGCTGGAACCGCGTTACGTTTGGAGTACTTAGAAGTGTCGTAGGCATAGGAGAG +CCGAAACTTGATATTGGGTCTAGTTAGTGCTCTACCCCGCTTCGGATCCGTAATAGTGAT +GTTGCATGAGACGCTTGCCTCAAGGCGACATACGGTGGCGGTTGCTGGAAATGACGCCCC +ACCCCGTGATAATTTGGGGACGAGCCGCACGATAACTTCTGCCAGCGACCGCCCCTCCCA +CGTCCGATGGTGATTCCGAA +>Frag_1 +CTCCACCTGCGTCTATCGTACAGTGCTAAAATGGCAGCAGATAGTGAAACTTCCGCTAAG +CTAGCCCCTCAGGGTACACTGCACCGAGGCGTGCTGTCAATACTTGATTAAGTCGGGTTG +TCGGGGACCTGCCGTCACGCTTCCGAGTGTATATCCGGATTTAGTTGACGTCATACAGAG +GCACTAAGAAGAATAAACGCTTACCTCCAGCAATCGTGTAGTGTCAGGTGTACGTTCTCC +CTTGCGCCGTTCGGCAAGCGTCCGGTGTCGGGCTGCAAGGAATAAATCTTTATGGACCAG +AGGGGCTTGTTTCCTCATATGGGTGCGTGCACTTATACGATTCAAAGGTGGATATGGCCG +CATAACACGTAGCCAGGCTATAGTCCCGCGGCCTAATTCCTTCGAGTGCGGGTGCCTGTT +TTTGTTTTTCCTTTACGACACGAACCGCTCTAACCTGCTCTATTTCGCCACGTTCCAGTG +AACCTCTTAGCCTACCGCCCACGTACGGTGGGACGCGTCGAGCAGTTAAGGTACTGTGGA +GAAATCGTTCAATATTAGAAAACAGGCGGTGTACGAATTACTGTGTCCCGGTGTTGCCCG +TTTAACGGCTGCCGTGGTCATACCGTGAGGCACCACGAGGGGATGCTACGCAACATGCGA +GGTGTAATCAGCAGGGAAGATCCCGGGGATCGAAAGCGGTCCGCGATTTGCGACCGATAT +GCATAAGGTGTCATTTATATTACACCTACGACACTGGTACCGGCTCACAGCCAAATGCAC +AGTCTCAAGATAGAATTCGCAAAAATTAGGCTTATCGGGCGCTTACTCTTTGTTACACTT +CTGGTCTGTGAGTGACGCCCTGTGTCCCATCACTGCATGTGAGGATGCGTGTACTGCACC +TGTGAATTACAGACGTTTCCCTACCAGGGCGCTACGTTATAATGTTCGGTTGCAACCCTC +TATAGGGTGATCTCGACATACCACTATGGTTTGCGCGTCTCGGGTGCTTTTGGCGAGGGC +CAGGTACGGCGGTGAGACAGCGTATTTTGCGAATAGGGTATTCGGACGCACGGTGTATAG +CTCTTGGAAAACGACTCTTCGACGGGCGTGTAGAAGTAGCCAAGCAGTTCGGTTGTTATC +ACTAAATGCGGACCATGTGCAGCCGTTTAGGAGAGGAGTCTGCAATGGATCAGCTGGTCA +GATTGTACCACTCGGTTGAGCCTAAGGCACTTCAGGTGCTCGGATTGTAATGAACCCTCA +GGTTACGCCGTTGCGCTGAGATCCCACAAGCCATAAGCAATGAGAATCGAGCAGCTATCG +AATTACGCAGCACTTGCTGATGAACTGGGTGCACCTAAAGCGCCCGATAAAACGGAGGGA +GTGAGGTAACTACTACATTGCGAGTAGTGCGAGGGGCCTCCGAGACTACATTAGAGCAGC +CATGCCGAATACGACAAAATCTATAGCCTTGCGAACGTAGGGGACTGATTTATGGACAAC +AGGACCGAAACAAGTTGGAGCCACAACGGAGCTATGGACGTTTTCACTATTGATGGGTGG +CTTGCGTCCGCATCCCAGGTTTCTCTGGTTAATTTTCATGACGATTCTTTGTGGTACGAT +CCAGGCATAAGATAGGGCACCTAACCCGATACTGTGAACTTATATGACGGCTGAACATGC +CCTAATAGTCTGCCAAAAGGGAAACCTATAACGGCTCTAAACGGGCCCCGGGTACGTAAC +AGCAATGTTCCTGTCCACCAAGGCGTTTTTAGTAACCGCCGATCGATCTCGATCGAATTC +CGTTTTACGC +>Frag_2 +GATCGAATTCCGTTTTACGCCCCAGATCGCAGCTCCCTTGACAGTGTCAGTTGAATCCCC +ATTCTAGGTTTCCTGATAAAACTAAGTACGGGGACGCATCAGTCGAAAGCAATCACAGAT +AATAGATATCTTGAGCAGAAAGACGTCTTCACGAATCAGGAGCGAGTGTCCCCTTTGAAA +GTGCGGGCAACAATAGGTGTGAGGTTTCACGGTTGCTTGATCAGTAGCTGTTCACTCTCT +CGCAGGCCTACATAGTGAGGTGGTGAGATTGTTCAGGTCTGTCTCCTGCGCGGTTGTCGA +GCCCCGTCGGGCACGCTAACAAGGACAGGAATTCGGGTCGTCAAATAACAATTTACGTCA +TCGCATCCCTCAAGGGTAAGGCGTGATCTCCATCTCGGTTCCCAATTGGTCATTGTGATA +GCCACACTGCAGTTACCGAAGGCGATTTTATACTACCCTGATAAGTGCTCGCAGCCCCTC +CTCCCTAATAATGTAACGACGAGCATTAACCCAGGAGTATTTCTCACGTTGCTCGTGCCC +AGCCAAGGCGATCAACGCAGACCGGATAGCGTTAGCTAGCCCACGGGTAAAAGCGATTGT +TGTACTCTTTCATATGAATACAGCACTCGCGAATGCAAGCCTACGTCGGCTCGATTCTCT +TCCTCACCACACTGGAGGCGATGATCTAGCCCGTCTTTCACCCCGTACCTTTATACTCGG +CCGCGTATTTCTCCACCGGGAGCTCTGCCTACCAAAGCGTCGTACGCCCAGGAAGCCGTT +TCCTTTATCCTCCCTCATGTCTCAACGCGCGAGCATCGGAGAGTACGCGAAATCGACTGT +CTGGCCAACACTGGACAGACGCGCGAGACGCTGATAAACCCCCATTTTCGTGGGGTTAGT +AAATCGGTGGCTTCAAACGGTTCGATCGCTCGCACGAGCATTAGTCAGGAGTATGATTGC +GCATCATCTCTACTCCGTACTAGGTTCTGTAGCTCAGGATCTCCGGGGGGATCAAGCTTT +TATGTCCCGCGGGATTAGAAGATATCCATCCATATGTTTACTTTGATGCGGGTCCGAGCT +CTCTATGATGCTGCTTTGACAAAATGTGGAAAGCCTCAGCGACCGCTCTGGGGTCAAAGC +TCTACGACACGTTGATGCTAAACAAGTTGGACTACTAAGCAAATGAAGCGGAAGAGATAC +TGAGCGCTCTCTCTATGACTTTCGCTATATGGCGACGTGCGAAAAACGCGATTTGGCCGA +AATCTCAAATAATAATGCAATGTGTTGGCTAAGCTCGGGCAAACTCAGGATGTGACCTAG +TACCCTCTGCCCCGTGAGTACCCATACCCAATCAAGCCAGCCCAATTCGCGATAGGTACA +GATAAGCCTTACTCGTCCCGATCCTTGACTAAAACACTCATCCTCGATAGTACGTGCCTT +GTAGAGTCCGACGGCCATAGGTAAACGACCAGTTAGTGTCGTAGGGTCCTCCACTTTAGC +TACGATTGCAACTCCCTTACCTTCGTGCCGGCGCGCTCCTCTTGCTACCATCACGCTTCA +AATCGGGTGGAAGACCAACTGATGAATGAGGGTGCAGACCCCAGTGGAGCATAATACGAT +GCGCCCACGATACTCTAGAGCCACGCGTCGACGTTCGTAAGATGTCCCGGGCCCGTGGGT +GGCCACATCGGCCGCCCATCCTTGTGTCTTCCGATAAGTACTCCCTTTCAAGGACTCTCC +GATACCGGCTTAGAACACGAACGGACAACGCTTTCAGCGCTGAGAAATGACGGCTGCTTC +AACCAGAACGATACAGCCTT +>Frag_3 +AACCAGAACGATACAGCCTTCTTTGCTAGATAGCCTCTAGTGTCAGCCACCAAGTCCGCC +ACCTAATCCTTCGCCGAGGGTCAAAGCTTCTGACCATCCAATTGTTAGGCACTCTTGGAA +CCTTGCAAGTACCTGATCATGCAGAAAATGGACTTGCCCGGAGCATATTCATTATAAGTA +CCATTCACGGCAGGTTATAGTATGCGTTGACGCGGGGTGATACGGTAATGCAAGTAACCA +TCGGGCCGCGGTATGCACATAGAAGGGTGTTTGACTTGGTTTCTTACAGTGCAACTGCAT +ATGTGGACCGCCATTCCAAGTGAAAGGATACGAAGGGAGGTTTCGGACTGCTATCCAGAG +CATGGAAAGCCTTCAAGGTGGTGCGCACTGGAATAGTGACTCGGGGATTGTATGTCGAGC +CGCGTCCAGTCTTTCCTATACTTATGCGAGCAAATAGACCGAGTGGAACTTGATTGCATC +AGCCGAATCACGATGAAAACTCAGCTTACAATTCGTAAGCACTTCAAATATATGCCCGGA +AAATCGCCGTCCAGTGACTTGTAATAGCTCCAGGGTCAGTAGCGCTGCCCTGGCCGAACC +AGGTGACGGTTACGTGGACCTCACAGTATCAATGTTGCAGTGCGCCACCAGATTAAGCTG +GCCGTGTCATGGTGAACAACCGTGATAAAAGAACGGGAGTATGAGAGTGCCACCCAAGAG +TGGTACTCCAGGATTCGGAAAGGGGAGGGGGCTTTTCCTGCTTAACAACCTTCCTCGGCA +CGGTCTAAGCGTCTCACTCGTCCTAAACTCTACCATCATGCAGCTAGGCGAAAGTATAAC +TTTAGGGTCTATCAGGATGAGCTATCTGTACCACGGTGATGAGATAAGTAGGGGGTGCGC +TAGGGTCGTTCGTGTTTTCATCGTCTATAGTACCTATTTCTCGTCTTCCATTAGCGCATA +CGGCCATCTCGCGGCGGCTACGTCAGCCAGCGTCAACCCCAAGTCACTAATTTCCGTTAT +TATAGGTGTAGCTCATAACTTCTTCTGCCGACCAAGCCAAAAGCTTTGCGGCAGCTGCAA +GGCCACGTTAGGCTCTCGGACGGCAGATATTTATTAATAACACCACTGGGTTCCAGCTCT +TAACGCACTTCCCTTCACTGGATCACCTGTACGCCCATAAGCTAAGCCTGAAGCCACGGC +CTCAATCGTGGACATTACTCTTAGCAAATCCACATTACCATGTCACCAATCCCCGAACCC +CGCGCCGACCTAAGGCGGGGTACCTCGTGCACGGCAGCATCGAGGGAAATATATTACAAA +AGACGCGGCGATCCAACGGTTGATAAAAACAGTTCCGTGGAAGGCGGACGTGTGCTTAGC +GCATGCTTCTCACCGAGTGACAGCACCGATTCCATCTCGCTAGCACGCCGATGAGCCTCG +CTGTGGGACTATGGTCCAGCCTATGCTTAACTAGGCAGAACAGAGAGTATGGTGGTCCTC +GGTAAGGTCGATTTGGTGGCACTCGCAAGATACGCGGGTTATTAGTAGAGCGCAGTCAGG +GCCCATCGCCAGCATTTGCATGCTCGCCGTACGCTATCCTTGTACTACGGTTCCTCAGTG +GCTATAAAGGAATGTCACAACTCCATTAGCCCCTTTTGTTCAGCGCTTAGTCATGGTGGG +ACTGACTCACCGAGACTACATACGGATTGTGCCCGCAGAGACATGCCTTTCATGCTGGGG +TTTTGTAAGCAGCTCCACGGGGTAGTGTCACCATGCCTTCGACCTATGCTTATGAAACCC +GAGGTAATGCGGCAGGATC +>Frag_4 +AGGTAATGCGGCAGGATCCCGTGGATCCTACGCTTAGCCTGTTTAAGCCATCGTTCTCCT +CGGCCCCCAGCCAGCGCGTATTCCGCGCATTCCTCCGAGTGCGTATCGCGCAAACCACAA +GATACCTCTCAACGACTAGCGGTGAACCACCAAGCTTGACATACGTGCCCCTTCGGAAAC +GTTATTAGTGGTCCTGAGTGGTACGTCCCGAGTATACGGGGCAGCGATAGTAACGCTACA +AATAGCTAGGCGAGGTTGCGATACCTGAATGAATATCCAACAGTGGCAACGAGACGCTTA +GAAAGGGAAACTACATACCAGTTCAATCCGGGATGACTTCAAAAGGGTATGTTTGTTTTG +AAGCGAAGCTTTGGTTGTCATTCAGTTCCTAGAGTTTACGGCGCTGATATGGCTCTAGGA +GTCCAGTGAATTCGTCAGCATAAGCCCCACATCGTAGGAAATTGCCCGGTCGTCGGTCTA +GCAAATGCTCGGTAAACTGGGAGTGTACTAACACCATGTCGTGGTAACCCGGTGGACCGG +CTACTAGAGACATATTTATTCATGCTCAGTTGAACCTGTTTCGAGGGGGCCGAGTTAACA +TAAAGTGATTAGTCACAGCGCATTAAGATAGAACAGGGATATAGCTCGTTGCATCCATAA +ATTGCCCACCGTAGGGAGAACGCCGTTGGAAAATGAGAGTTAGGCGATGAGCGGTTGTCG +CCCGCACAGACAAAGGATGCCTTTATAAATAGTAGCGGCCTTGTCTGCACCAGACGTTTG +TGGGTCGACGTGCTATTCTAGCCAAAAGCAAAATGTATCATACTACAGGCAATAGGCCCT +ATGCTGACCAAACTTGGGGACCCTTGCGCCTTCTCTCTTACGCATAACATGCATAGGTAC +CTCTACTCATCAGGCAGGGCGGCGTGAGGTCACTATGGCTCAAGATGTGTACGACTAAAG +AAAGGTTTATGCTCCTTCCCCAAGGACGCATTTGGGACTGCTACTTGCCCCTAGCGAATT +CACTAGGATTTTTGTAGAACCATGAGCGCCCTATCCGATAGCACAGAGACAATGCTACAA +GCAACTGTGCATGCGCTCGATCGCCGTGCATTAATACGTATTATAGCGTATCGTGTACGC +TAATATCTTAGTGCACCGCACGCTGGTTGGATACAATTCCGTGAAATAATTCCTGCTTAC +ACAGGGGCTTTCTGGCGC +>Frag_4b +AGGTAATGCGGCAGGATCCCGTGGATCCTACGCTTAGCCTGTTACCCCCCCCGTTCTCCT +CGGCCCCCAGCCAGCGCGTATTCCGCGCATTCCTCCGAGTGCGTATCGCGCAAACCACAA +GATACCTCTCAACGACTAGCGGTGAACCACCAAGCTTGACATACGTGCCCCTTCGGAAAC +GTTATTAGTGGTCCTGAGTGGTACGTCCCGAGTATACGGGGCAGCGATAGTAACGCTACA +AATAGCTAGACCGACGACAAACACAGCACTGAATATCCAACAGTGGCAACGAGACGCTTA +GAAAGGGAAACTACATACCAGTTCAATCCGGGATGACTTCAAAAGGGTATGTTTGTTTTG +AAGCGAAGCTTTGGTTGTCATTCAGTTCCTAGAGTTTACGGCGCTGATATGGCTCTAGGA +GTCCAGTGAATTCGTCAGCATAAGCCCCACATCGTAGGAAATTGCCCGGTCGTCGGTCTA +GCAAATGCTCGGTAAACTGGGAGTGTACTAACACCATGTCGTGGTAACCCGGTGGACCGG +CTACTAGAGACATATTTATTCATGCTCAGTTGAACCTGTTTCGAGGGGGCCGAGTTAACA +TAAAGTGATTAGTCACAGCGCATTAAGATAGAACAGGGATATAGCTCGTTGCATCCATAA +ATTGCCCACCGTAGGGAGAACGCCGTTGGAAAATGAGAGTTAGGCGATGAGCGGTTGTCG +CCCGCACAGACAAAGGATGCCTTTATAAATAGTAGCGGCCTTGTCTGCACCAGACGTTTG +TGGGTCGACGTGCTATTCTAGCCAAAAGCAAAATGTATCATACTACAGGCAATAGGCCCT +ATGCTGACCAAACTTGGGGACCCTTGCGCCTTCTCTCTTACGCATAACATGCATAGGTAC +CTCTACTCATCAGGCAGGGCGGCGTGAGGTCACTATGGCTCAAGATGTGTACGACTAAAG +AAAGGTTTATGCTCCTTCCCCAAGGACGCATTTGGGACTGCTACTTGCCCCTAGCGAATT +CACTAGGATTTTTGTAGAACCATGAGCGCCCTATCCGATAGCACAGAGACAATGCTACAA +GCAACTGTGCATGCGCTCGATCGCCGTGCATTAATACGTATTATAGCGTATCGTGTACGC +TAATATCTTAGTGCACCGCACGCTGGTTGGATACAATTCCGTGAAATAATTCCTGCTTAC +ACAGGGGCTTTCTGGCGC +>Frag_4c +AGGTAATGCGGCAGGATCCCGTGGATCCTACGCTTAGCCTGTTAGGGGGGGGGTTCTCCT +CGGCCCCCAGCCAGCGCGTATTCCGCGCATTCCTCCGAGTGCGTATCGCGCAAACCACAA +GATACCTCTCAACGACTAGCGGTGAACCACCAAGCTTGACATACGTGCCCCTTCGGAAAC +GTTATTAGTGGTCCTGAGTGGTACGTCCCGAGTATACGGGGCAGCGATAGTAACGCTACA +AATAGCTAGACCGACGACAAACACAGCACTGAATATCCAACAGTGGCAACGAGACGCTTA +GAAAGGGAAACTACATACCAGTTCAATCCGGGATGACTTCAAAAGGGTATGTTTGTTTTG +AAGCGAAGCTTTGGTTGTCATTCAGTTCCTAGAGTTTACGGCGCTGATATGGCTCTAGGA +GTCCAGTGAATTCGTCAGCATAAGCCCCACATCGTAGGAAATTGCCCGGTCGTCGGTCTA +GCAAATGCTCGGTAAACTGGGAGTGTACTAACACCATGTCGTGGTAACCCGGTGGACCGG +CTACTAGAGACATATTTATTCATGCTCAGTTGAACCTGTTTCGAGGGGGCCGAGTTAACA +TAAAGTGATTAGTCACAGCGCATTAAGATAGAACAGGGATATAGCTCGTTGCATCCATAA +ATTGCCCACCGTAGGGAGAACGCCGTTGGAAAATGAGAGTTAGGCGATGAGCGGTTGTCG +CCCGCACAGACAAAGGATGCCTTTATAAATAGTAGCGGCCTTGTCTGCACCAGACGTTTG +TGGGTCGACGTGCTATTCTAGCCAAAAGCAAAATGTATCATACTACAGGCAATAGGCCCT +ATGCTGACCAAACTTGGGGACCCTTGCGCCTTCTCTCTTACGCATAACATGCATAGGTAC +CTCTACTCATCAGGCAGGGCGGCGTGAGGTCACTATGGCTCAAGATGTGTACGACTAAAG +AAAGGTTTATGCTCCTTCCCCAAGGACGCATTTGGGACTGCTACTTGCCCCTAGCGAATT +CACTAGGATTTTTGTAGAACCATGAGCGCCCTATCCGATAGCACAGAGACAATGCTACAA +GCAACTGTGCATGCGCTCGATCGCCGTGCATTAATACGTATTATAGCGTATCGTGTACGC +TAATATCTTAGTGCACCGCACGCTGGTTGGATACAATTCCGTGAAATAATTCCTGCTTAC +ACAGGGGCTTTCTGGCGC +>Frag_5 +ACAGGGGCTTTCTGGCGCATGGCTGTGTCTGGATGTTTGTGATAAGAGGCTCCATGAACC +CGGCGGGAAATGAGGGGAAACCCCGGGGAGCAACGACACTAAGCCTGGCAGTTGTTCAGA +TAGGACGCTTTTGTCAGTGGGCGGTTTTGCATCCACTAACTATCATAAACGACACAACGT +CGAATGATCATCGGCGGTTTGCATGTAGAGACAGCTCCTGCAACACCATACCGGCTCGTG +GAATTACGCTGTGCGCCGGCCCTCATTCGATTGTGTCCAGCGTCGGCGCGAATGATGATT +GAGATGTGCTTCGGCTCAGAGCGTGCACAGGTAGTTCTCCCCCCTTACCTAGCTGATCCA +GCAATTCCACGCTGGCCCGCGCGTCCTTGGGTAACACTGGATCTGTACCCAAATACACCG +TCGTGATTGTTAACTACCGTATGGCGGTTACGATCGAGTGACACTATTATTAGACCGCTA +CTCCAAGTCCCAACTTTTCATCGCCATACAGAAGAGAACGAGAAAGTCGAAGAGTTATAG +CGTGTACACTTCTGATTAGCTAACGAATCGATGCGTACAGCATACTCGAGCAACTTTATG +GGTCGTCTGTCTCCTTGTACCCCGCTTGACGAGGATGTGTTTGTAGGCGGCTTGGATATC +GGCAGTGTATCCGCATGGTCAAACACTTTAGCCCGACCGGTGCTCAAGGTCGAAATCCTA +ATATAGATTGGGATAGGACCCGTCCATTACTAGGCGACATTCGCAGTCACGAGCCCGCAC +GGGGTGCCTCAACACGGTGACACTCAATTCGCCGGCACTGAGTACGACGATCGTGATTGC +CATACCGTCATGTTTTGTGTTACCCCTGATAAGAAGCGCAGATATGGCACGGGGGCAGAG +GGCGGAGTAGACCCATGCAAAACTCTAGCCTCTGGGCAGCATGATCGACGTAGAACGCCG +TTGTGACTCACTAGTTTCAGTTATGATGTTCAGCCAAGAATCCTCCATTTTGACGTGAAC +CTTCTCCCTTTACACCTTGAAGCTGCAATATTAAGTCGACCCTTTAAGTTGCCTTGTCTT +GGTCTAGAGCCCGCAGCTCACCGATAAAATGAGCAGTCAATTTTAGCGGCGGTTGTCCAG +GTTACATGCACTTTGGGCTGGAACCGCGTTACGTTTGGAGTACTTAGAAGTGTCGTAGGC +ATAGGAGAGCCGAAACTTGATATTGGGTCTAGTTAGTGCTCTACCCCGCTTCGGATCCGT +AATAGTGATGTTGCATGAGACGCTTGCCTCAAGGCGACATACGGTGGCGGTTGCTGGAAA +TGACGCCCCACCCCGTGATAATTTGGGGACGAGCCGCACGATAACTTCTGCCAGCGACCG +CCCCTCCCACGTCCGATGGTGATTCCGAACTCCACCTGCGTCTATCGT +>Frag_5b +ACAGGGGCTTTCTGGCGCATGGCTGTGTCTGGAAAAAAAAAAAAAAAAAAAACATGAACC +CGGCGGGAAATGAGGGGAAACCCCGGGGAGCAACGACACTAAGCCTGGCAGTTGTTCAGA +TAGGACGCTTTTGTCAGTGGGCGGTTTTGCATCCACTAACTATCATAAACGACACAACGT +CGAATGATCATCGGCGGTTTGCATGTAGAGACAGCTCCTGCAACACCATACCGGCTCGTG +GAATTACGCTGTGCGCCGGCCCTCATTCGATTGTGTCCAGCGTCGGCGCGAATGATGATT +GAGATGTGCTTCGGCTCAGAGCGTGCACAGGTAGTTCTCCCCCCTTACCTAGCTGATCCA +GCAATTCCACGCTGGCCCGCGCGTCCTTGGGTAACACTGGATCTGTACCCAAATACACCG +TCGTGATTGTTAACTACCGTATGGCGGTTACGATCGAGTGACACTATTATTAGACCGCTA +CTCCAAGTCCCAACTTTTCATCGCCATACAGAAGAGAACGAGAAAGTCGAAGAGTTATAG +CGTGTACACTTCTGATTAGCTAACGAATCGATGCGTACAGCATACTCGAGCAACTTTATG +GGTCGTCTGTCTCCTTGTACCCCGCTTGACGAGGATGTGTTTGTAGGCGGCTTGGATATC +GGCAGTGTATCCGCATGGTCAAACACTTTAGCCCGACCGGTGCTCAAGGTCGAAATCCTA +ATATAGATTGGGATAGGACCCGTCCATTACTAGGCGACATTCGCAGTCACGAGCCCGCAC +GGGGTGCCTCAACACGGTGACACTCAATTCGCCGGCACTGAGTACGACGATCGTGATTGC +CATACCGTCATGTTTTGTGTTACCCCTGATAAGAAGCGCAGATATGGCACGGGGGCAGAG +GGCGGAGTAGACCCATGCAAAACTCTAGCCTCTGGGCAGCATGATCGACGTAGAACGCCG +TTGTGACTCACTAGTTTCAGTTATGATGTTCAGCCAAGAATCCTCCATTTTGACGTGAAC +CTTCTCCCTTTACACCTTGAAGCTGCAATATTAAGTCGACCCTTTAAGTTGCCTTGTCTT +GGTCTAGAGCCCGCAGCTCACCGATAAAATGAGCAGTCAATTTTAGCGGCGGTTGTCCAG +GTTACATGCACTTTGGGCTGGAACCGCGTTACGTTTGGAGTACTTAGAAGTGTCGTAGGC +ATAGGAGAGCCGAAACTTGATATTGGGTCTAGTTAGTGCTCTACCCCGCTTCGGATCCGT +AATAGTGATGTTGCATGAGACGCTTGCCTCAAGGCGACATACGGTGGCGGTTGCTGGAAA +TGACGCCCCACCCCGTGATAATTTGGGGACGAGCCGCACGATAACTTCTGCCAGCGACCG +CCCCTCCCACGTCCGATGGTGATTCCGAACTCCACCTGCGTCTATCGT \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 test-data/example_genetic_parts_and_backbone/parts/partA.gb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/example_genetic_parts_and_backbone/parts/partA.gb Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,94 @@ +LOCUS Exported 4022 bp ds-DNA circular UNK 01-JAN-1980 +DEFINITION natural circular DNA +ACCESSION RFP_GFP_BO_10232 +TGGCTCGAGTTTTTCAGCAAGATTCCCTATCAGTGATAGAGATTGACATCC + +> RFP_GFP_BO_10233 +CGGATCTGAAAGAGGAGAAAGGATCTATGGCGAGTAGCGAAGACGT + +> RFP_GFP_BO_10234 +GTGTTGTTACCGTTACCCAGGACTCCTCCCTGCAAGACGGTG + +> RFP_GFP_BO_10235 +GGCCGCGTGTTACAACCAATGAAAGTGAAACGTGATTTCATGCGT + +> RFP_GFP_BO_10236 +AATGGTAAACTGACGCTGAAGTTCATCTGTACTACTGGTAAACTGCCGG + +> RFP_GFP_BO_10237 +CATAAGCTGGAATACAATTTTAACAGCCACAATGTTTACATCACCGCCGATAAACAAAAAAA + +> RFP_GFP_BO_10238 +CGGGTGGGCCTTTCTGCGTTTATAATCTTTCTAGAAGATCTCCTACAATATTCTCAGC + +> RFP_GFP_BO_10244 +GGGTGGGCCTTTCTGCGTTTATAATCTTTCTAGAAGATCTCCTACAATATTCTCAGC + +> RFP_GFP_BO_10268 +GGCCGCGTGTTACAACCAATGAAAGTGAAACGTGATTTCATGCGTC + +> RFP_GFP_BO_10281 +AGCTGGAATACAATTTTAACAGCCACAATGTTTACATCACCGCCGATAAACAA \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 test-data/lcr_assembly_dataset/RFP_GFP_plasmid_parts.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lcr_assembly_dataset/RFP_GFP_plasmid_parts.fa Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,93 @@ +> p1_mRFP1-part1 +TCCCTATCAGTGATAGAGATTGACATCCCTATCAGTGATAGAGATACTGAGCACGGATCTGAAAGAGGAGAAAGGATCT + +> p2_mRFP1-part2 +ATGGCGAGTAGCGAAGACGTTATCAAAGAGTTCATGCGTTTCAAAGTTCGTATGGAAGGTTCC +GTTAACGGTCACGAGTTCGAAATCGAAGGTGAAGGTGAAGGTCGTCCGTACGAAGGTACTCA +GACCGCTAAACTGAAAGTTACCAAAGGTGGTCCGCTGCCGTTCGCTTGGGACATCCTGTCCCC +GCAGTTCCAGTACGGTTCCAAAGCTTACGTTAAACACCCGGCTGACATCCCGGACTACCTGAA +ACTGTCCTTCCCGGAAGGTTTCAAATGGGAACGTGTTATGAACTTCGAAGACGGTGGTGTTGTT +ACCGTTACCCAGGAC + +> p3_mRFP1-part3 +TCCTCCCTGCAAGACGGTGAGTTCATCTACAAAGTTAAACTGCGTGGTACTAACTTCCCGTCCG +ACGGTCCGGTTATGCAGAAAAAAACCATGGGTTGGGAAGCTTCCACCGAACGTATGTACCCGG +AAGACGGTGCTCTGAAAGGTGAAATCAAAATGCGTCTGAAACTGAAAGACGGTGGTCACTAC +GACGCTGAAGTTAAAACCACCTACATGGCTAAAAAACCGGTTCAGCTGCCGGGTGCTTACAAA +ACCGACATCAAACTGGACATCACCTCCCACAACGAAGACTACACCATCGTTGAACAGTACGAA +CGTGCTGAAGGTCGTCACTCCACCGGTGCTTAATAAGGATCTCCAGGCATCAAATAAAACGAA +AGGCTCAGTCGAAAGACTGGGCCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTA +GAGTCACACTGGCTCACCTTCGGGTGGGCCTTTCTGCGTTTATAAGTCGGTCTCACCGAGCGGC +CGCGTGTTACAACCAAT + +> p4_sfGFP-part1 +GAAAGTGAAACGTGATTTCATGCGTCATTTTGAACATTTTGTAAATCTTATTTAATAATGTGTG +CGGCAATTCACATTTAATTTATGAATGTTTTCTTAACATCGCGGCAACTCAAGAAACGGCAGGT +TCGGATCTTAGCTACTAGAGAAAGAGGAGAAATACTAGATGCGTAAAGGCGAAGAGCTGTTC +ACTGGTGTCGTCCCTATTCTGGTGGAACTGGATGGTGATGTCAACGGTCATAAGTTTTCCGTGC +GTGGCGAGGGTGAAGGTGACGCAACTAATGGTAAACTGACGCTGAAGTTCA + +> p5_sfGFP-part2 +TCTGTACTACTGGTAAACTGCCGGTTCCTTGGCCGACTCTGGTAACGACGCTGACTTATGGTGT +TCAGTGCTTTGCTCGTTATCCGGACCATATGAAGCAGCATGACTTCTTCAAGTCCGCCATGCCG +GAAGGCTATGTGCAGGAACGCACGATTTCCTTTAAGGATGACGGCACGTACAAAACGCGTGCG +GAAGTGAAATTTGAAGGCGATACCCTGGTAAACCGCATTGAGCTGAAAGGCATTGACTTTAAA +GAGGACGGCAATATCCTGGGCCATAAGCTGGAATACAATTTTAACAGCCACA + +> p6_sfGFP-part3 +ATGTTTACATCACCGCCGATAAACAAAAAAATGGCATTAAAGCGAATTTTAAAATTCGCCACA +ACGTGGAGGATGGCAGCGTGCAGCTGGCTGATCACTACCAGCAAAACACTCCAATCGGTGATG +GTCCTGTTCTGCTGCCAGACAATCACTATCTGAGCACGCAAAGCGTTCTGTCTAAAGATCCGA +ACGAGAAACGCGATCATATGGTTCTGCTGGAGTTCGTAACCGCAGCGGGCATCACGCATGGTA +TGGATGAACTGTACAAATGACCAGGCATCAAATAAAACGAAAGGCTCAGTCGAAAGACTGGG +CCTTTCGTTTTATCTGTTGTTTGTCGGTGAACGCTCTCTACTAGAGTCACACTGGCTCACCTTCG +GGTGGGCCTTTCTGCGTTTATA + +> p7_backbone +ATCTTTCTAGAAGATCTCCTACAATATTCTCAGCTGCCATGGAAAATCGATGTTCTTCTTTTATT +CTCTCAAGATTTTCAGGCTGTATATTAAAACTTATATTAAGAACTATGCTAACCACCTCATCAG +GAACCGTTGTAGGTGGCGTGGGTTTTCTTGGCAATCGACTCTCATGAAAACTACGAGCTAAAT +ATTCAATATGTTCCTCTTGACCAACTTTATTCTGCATTTTTTTTGAACGAGGTTTAGAGCAAGCT +TCAGGAAACTGAGACAGGAATTTTATTAAAAATTTAAATTTTGAAGAAAGTTCAGGGTTAATA +GCATCCATTTTTTGCTTTGCAAGTTCCTCAGCATTCTTAACAAAAGACGTCTCTTTTGACATGTT +TAAAGTTTAAACCTCCTGTGTGAAATTATTATCCGCTCATAATTCCACACATTATACGAGCCGG +AAGCATAAAGTGTAAAGCCTGGGGTGCCTAATGAGTGAGCTAACTCACATTAATTGCGTTGCG +CTCACTGCCAATTGCTTTCCAGTCGGGAAACCTGTCGTGCCAGCTGCATTAATGAATCGGCCAA +CGCGCGGGGAGAGGCGGTTTGCGTATTGGGCGCTCTTCCGCTTCCTCGCTCACTGACTCGCTGC +GCTCGGTCGTTCGGCTGCGGCGAGCGGTATCAGCTCACTCAAAGGCGGTAATACGGTTATCCA +CAGAATCAGGGGATAACGCAGGAAAGAACATGTGAGCAAAAGGCCAGCAAAAGGCCAGGAA +CCGTAAAAAGGCCGCGTTGCTGGCGTTTTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAA +AAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTC +CCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGC +CTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTG +TAGGTCGTTCGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCT +TATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAG +CCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGT +GGCCTAACTACGGCTACACTAGAAGGACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTA +CCTTCGGAAAAAGAGTTGGTAGCTCTTGATCCGGCAAACAAACCACCGCTGGTAGCGGTGGTT +TTTTTGTTTGCAAGCAGCAGATTACGCGCAGAAAAAAAGGATCTCAAGAAGATCCTTTGATCT +TTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGAT +TATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAG +TATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCG +ATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGG +AGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAG +ATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTAT +CCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAG +TTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCT +TCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAA +GCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCCGCAGTGTTATCACTCA +TGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGAC +TGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCC +GGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAA +ACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCC +ACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAA +CAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATA +CTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATT +TGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACATTTCCCCGAAAAGTGCCACC +TGACGTCTAAGAAACCATTATTATCATGACATTAACCTATAAAAATAGGCGTATCACGAGGCC +GCCCCTGCAGCCGAATTATATTATTTTTGCCAAATAATTTTTAACAAAAGCTCTGAAGTCTTCT +TCATTTAAATTCTTAGATGATACTTCATCTGGAAAATTGTCCCAATTAGTAGCATCACGCTGTG +AGTAAGTTCTAAACCATTTTTTTATTGTTGTATTATCTCTAATCTTACTACTCGATGAGTTTTCG +GTATTATCTCTATTTTTAACTTGGAGCAGGTTCCATTCATTGTTTTTTTCATCATAGTGAATAAA +ATCAACTGCTTTAACACTTGTGCCTGAACACCATATCCATCCGGCGTAATACGACTCACTATAG +GGAGAGCGGCCGCCAGATCTTCCGGATGGCTCGAGTTTTTCAGCAAGAT \ No newline at end of file diff -r 000000000000 -r dc450979fcd4 test-data/lcr_assembly_dataset/assembly_plan.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/lcr_assembly_dataset/assembly_plan.csv Fri May 16 09:39:03 2025 +0000 @@ -0,0 +1,2 @@ +Construct1,RFP_GFP_BO_10232,oligo_indicator: _BO_,RFP_GFP_BO_10234,RFP_GFP_BO_10236,RFP_GFP_BO_10244,RFP_GFP_BO_10268,RFP_GFP_BO_10281,p1_mRFP1-part1,p2_mRFP1-part2,p3_mRFP1-part3,p4_sfGFP-part1,p5_sfGFP-part2,p6_sfGFP-part3,p7_backbone +Construct2_flawed,RFP_GFP_BO_10232,RFP_GFP_BO_10233,RFP_GFP_BO_10244,RFP_GFP_BO_10268,RFP_GFP_BO_10281,p1_mRFP1-part1,p2_mRFP1-part2,p3_mRFP1-part3,p4_sfGFP-part1,p5_sfGFP-part2,p6_sfGFP-part3,p7_backbone,,