# HG changeset patch
# User imgteam
# Date 1766156569 0
# Node ID 9fee6d81910d194d58ffb631d5a627233c888620
planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/iscc-sum commit 6db86b8b65a0e05b7f3541d505fbe900633fc72a
diff -r 000000000000 -r 9fee6d81910d creators.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/creators.xml Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,21 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 9fee6d81910d iscc_similarity_parse_output.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iscc_similarity_parse_output.py Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,158 @@
+#!/usr/bin/env python
+"""
+Parse ISCC similarity output into tabular format with unique identifiers.
+
+Input format (from iscc-sum --similar):
+ ISCC:K4AOMG... *file1.txt
+ ~08 ISCC:K4AOMG... *file2.txt
+ ~10 ISCC:K4AOMG... *file3.txt
+ ISCC:K4AGSPO... *file4.txt
+
+Output format (tabular with 7 columns, bidirectional):
+ file_id filename iscc_code match_id match_filename match_iscc_hash distance
+ 23 file1.txt K4AOMG... 24 file2.txt K4AOMG... 8
+ 24 file2.txt K4AOMG... 23 file1.txt K4AOMG... 8
+ 25 file4.txt K4AGSPO... -1
+"""
+import argparse
+
+
+def clean_filename(filename):
+ """Remove directory prefix from filename."""
+ # Remove 'input_files/' prefix if present
+ if filename.startswith('input_files/'):
+ filename = filename[len('input_files/'):]
+
+ return filename
+
+
+def load_id_mapping(mapping_file):
+ """Load filename to element_identifier mapping.
+
+ Returns: dict mapping cleaned filename -> element_identifier
+ """
+ mapping = {}
+ with open(mapping_file, 'r') as f:
+ for line in f:
+ parts = line.strip().split('\t')
+ if len(parts) == 2:
+ filename, element_id = parts
+ # Clean the filename the same way as in parse
+ cleaned = clean_filename(filename)
+ mapping[cleaned] = element_id
+ return mapping
+
+
+def parse_iscc_line(line):
+ """Parse ISCC line and extract code and filename.
+
+ Format: "ISCC:CODE *filename" or " ~NN ISCC:CODE *filename"
+ Returns: (code, filename) or (None, None) if parse fails
+ """
+ # Find the * separator
+ if ' *' not in line:
+ return None, None
+
+ # Split on ' *' to get code part and filename
+ parts = line.split(' *', 1)
+ code_part = parts[0].strip()
+ filename = clean_filename(parts[1].strip())
+
+ # Extract CODE (after 'ISCC:')
+ if 'ISCC:' in code_part:
+ code = code_part.split('ISCC:', 1)[1].strip()
+ else:
+ code = ''
+
+ return code, filename
+
+
+def main():
+ parser = argparse.ArgumentParser(
+ description='Parse ISCC similarity output into tabular format'
+ )
+ parser.add_argument(
+ 'similarity_raw',
+ help='Raw similarity output from iscc-sum --similar'
+ )
+ parser.add_argument(
+ 'id_mapping',
+ help='TSV file mapping filenames to element identifiers'
+ )
+ parser.add_argument(
+ 'output_file',
+ help='Tabular output file'
+ )
+ args = parser.parse_args()
+
+ # Load ID mapping
+ id_map = load_id_mapping(args.id_mapping)
+
+ # Parse similarity output
+ file_codes = {} # filename -> code mapping
+ matches = [] # List of (file1, code1, file2, code2, distance)
+ current_ref = None
+ current_code = None
+
+ with open(args.similarity_raw, 'r') as f:
+ for line in f:
+ line = line.rstrip()
+ if not line:
+ continue
+
+ if line.startswith('ISCC:'):
+ # Reference file: "ISCC:CODE *filename"
+ code, filename = parse_iscc_line(line)
+ if code and filename:
+ current_ref = filename
+ current_code = code
+ file_codes[filename] = code
+
+ elif line.startswith(' ') and current_ref:
+ # Similar file: " ~NN ISCC:CODE *filename"
+ parts = line.strip().split(None, 1) # Split on first whitespace
+ if len(parts) == 2:
+ dist_str = parts[0].replace('~', '')
+ distance = int(dist_str)
+
+ # Parse the rest of the line for ISCC and filename
+ code, filename = parse_iscc_line(parts[1])
+
+ if code and filename:
+ matches.append((current_ref, current_code, filename, code, distance))
+ file_codes[filename] = code
+ # Write output with identifiers
+ with open(args.output_file, 'w') as out:
+ # Write header (7 columns)
+ out.write("file_id\tfilename\tiscc_code\tmatch_id\tmatch_filename\tmatch_iscc_code\tdistance\n")
+
+ # Track which files have matches
+ files_with_matches = set()
+
+ # Write similarity matches in both directions
+ for file1, code1, file2, code2, distance in matches:
+ # Get element identifiers
+ file1_name = id_map[file1]
+ file2_name = id_map[file2]
+ file1_id = str.split(file1, '_', 1)[0] # Extract ID from filename
+ file2_id = str.split(file2, '_', 1)[0] # Extract ID from filename
+
+ # Write A -> B (file_id is the numeric ID, filename is the element_identifier)
+ out.write(f"{file1_id}\t{file1_name}\t{code1}\t{file2_id}\t{file2_name}\t{code2}\t{distance}\n")
+ # Write B -> A (bidirectional)
+ out.write(f"{file2_id}\t{file2_name}\t{code2}\t{file1_id}\t{file1_name}\t{code1}\t{distance}\n")
+
+ files_with_matches.add(file1)
+ files_with_matches.add(file2)
+
+ # Write files with no matches (distance = -1, empty match columns)
+ for filename in sorted(file_codes.keys()):
+ if filename not in files_with_matches:
+ file_id = str.split(filename, '_', 1)[0] # Extract ID from filename
+ element_name = id_map[filename]
+ code_val = file_codes[filename]
+ out.write(f"{file_id}\t{element_name}\t{code_val}\t\t\t\t-1\n")
+
+
+if __name__ == '__main__':
+ main()
diff -r 000000000000 -r 9fee6d81910d iscc_verify.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iscc_verify.xml Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,197 @@
+
+ with ISCC-SUM
+
+ macros.xml
+ creators.xml
+
+
+
+
+
+
+
+
+
+
+ &2;
+ echo "Found: \${#EXPECTED} characters" >&2;
+ exit 1;
+ fi &&
+
+ ## Output verification report
+ if [ "\$GENERATED" = "\$EXPECTED" ]; then
+ echo "OK - ISCC-CODEs match" > '${output_file}';
+ else
+ echo "FAILED - ISCC-CODEs do not match" > '${output_file}';
+ fi &&
+ echo "Expected: \$EXPECTED" >> '${output_file}' &&
+ echo "Generated: \$GENERATED" >> '${output_file}' &&
+ echo "" >> '${output_file}'
+
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 9fee6d81910d macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,24 @@
+
+ 0.1.0
+ 1
+
+
+
+ @misc{iscc_sum,
+ title={ISCC - Similarity Hash for Digital Media},
+ url={https://iscc.codes/},
+ note={Accessed: 2025}
+ }
+
+
+
+
+
+ iscc-sum
+
+
+
+ iscc-sum --version
+
+
+
diff -r 000000000000 -r 9fee6d81910d test-data/sequence1.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence1.txt Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,1 @@
+CTCACTTGTGGATACCCCGACCTATTTTGACGGGACCACTCGCGGTAGTCGTTGGGCTTATGCACCGTAAAGTCCTCCGCCGGCCTCCCCCCTACAAAAGATGATAAGCTCCGGCAAGCAATATTGAACAACGCAAGGATCGGCGATATAAACAGAGAAACGGCTGATTACTCTTGTTGGTGTGGTATCGCTAAACTGCGTCGCGGAGCCTTATGGCATAGTCGTCCGCGGAGCACTCTGGTAACGCTTATGGTCCATAGCACATTCATCGCATCCGGGCATGCGCTCTATTTGACGATCCCTTGGCGCAGAGATGCTGGCCACGAGCTAAATTAAAGCGACTGCACTACTGTAAGGTCCGTCACGCAGACGACGGCCCGGGGAGAGCACTAACCCATCAACCTGTACGGGAACTTTCTATATCGTTCTCGGACGGAGAGATAACTACAGTGCCGCTTACAGCCCCTCTGTCGTCGCCGACGTCTGTAATATAGCCTTGTTGTGATTCCACCCTATTGAGGCATTGACTGATGCGGAAGGAGATCTGGAATGAACTGGTCTATGTGACAGAAACTGTGCAAGTACCTAATCTCGTTAGTGTAGGTTCTGACCGATACGTGCTTCGTTGAGAACTCACAATTTTACAACTGGGGACATAAGCCCTACGCCCATCATCTACTGACGTCCCTGAGGCTGCAGTTCATGTAATGGGACAGTATCCGCCGCAAGTTCTAGTGCAATGGCGGTATAGTACGCTCGTACTGTAGTAGAGGCGACACGGGTGGGATCATCACTAATAAGGATACTGGGAAGACTCACAGGCCTCCGCCTATAGGCGGTGCTTACTCTTACATAAAGCGGCTGTTAGTATTACCCCGCGAGGATTCGAAAAGGTGAACCAACCCGGTCGATCCGGAGGGACGGGCCTCAAAGCCGCGTGACGACGGCTGTCGGCCCGTAACAGAATCCCCGCAATAAGCTCCCGTGAGCCTCGATTGAACAGCCCTGGTGGGCCCCATCAGTAGCCCGAATATGTCGCTTTTCGGGTCCTGGGCCGAGGAGCGATACCTTCCAGTAATCGAGGCCGTTCGTTAATTCTTGTTGCGTTCCTAGCGCCTATATTTGTCTCTTTGCCGGCTTATGTGGACAAGCATAGCATAGCCATTTATCGGAGCGCCTCGGTACACGGTATGACCAGACGCCTCGTGAGACCATTACGTATACCAGGTGTCCTGTGAGCAGCGAAGGCCCATACGCGAGATACACTGCCAGAAATCCGCGTGATTACGAGTCGTGGTAAATTTAATCTGGCTGTGGTCTAGACATTCCAGGCGGTGCGTCTGCTGTCGGGTGCCTCTGGTGACTGGCTAGATGGACTTGCCGCTGGTAAACACACCATGACCCCGCCTCTCCATTGATGCCACGGCGAATGTCGGGGAGACAGCAGCGACTGCAGACATCAGATCAGAGTAATACTAACATGCGATAAGTCCCTAACTGACTATGGCCTTCTGTAGAGTCTACTTCACCAGATATGCTGTCTCTGGCACGTGGATGGTTTAGAGGAATCACATTCAAGTCTGGTTAACCATGAAACAAGTCTTGAGTGTAAAATTGTCGTCTCCTGTGTACGAGATGGAGGTACTAGATGACTGCAGGGACTCCGACGTTATGTACGTTGCTCCGTCAAAGGCGCCATTCAGGATCACGTTACCGCCAAAAAATGGGAGCAGGAGCTCTTCTCCCCTGCGGTCACGTCTATAGAAATTACACCTTTAACCCTCCTGAGAACCGGGAGGCGGGAATCCGTCACGTATGAGAAGGTATTTGCCCGACAATCAATACCGGACGCTCCTAAGTTTTTCCACTCGCTTGAGCCGGCTAGGCCTCTCTGCCCGAAGTTTCGACGGACTGCTGCCAACGCCCAGGCATAGTTTTAGGAGGATTATTCGGGGGCACTGGCAACCAACTTCTCGGGTCCTGCCCGACTGGTCTTCGGGCTAATATAGCGAATTGCCGAGAACCCGGCCCCACGCAATGGAACGTCCTTAGCTCCGGCAGGCAATTAAGGGGAACGTAAGTATAGCGCAAAAAAACAGAGAAATAGGCGAATGAATCTATTCTTACTGTATCGAAGAATGGCCTCGCGGAGGCATGTGTCATGCTAGCGTGCGGGGTACTCTAGTTATCCATATGGTCCACAGGACACTCGTTGCTTTCGGATTTGCCCTTTATGCGCCGGTTTTCAGCCACGCTTATGCTCAGCATCGTTATAACCAGACCGATACTAGATCTATAAAGTCCGCCATGCAGACGAGACCAGACGGAGATTACCGAGCAATCTATCAGATCGGCGACCATTAGTGAGCTACTGGAGCCGAGGGGTAACTACGATGCCGCTAAGAACCTCTCGGTCGTCGCTAGCGATTACACTCCAGTCTCATTATAATCGTTCGCTATTCAGGGATTGACCAACACCGGAAAACATTTCACTTGAAGTATTGTATACGACAGAGTCCGTGCACCTACCAAACCTGTTTAATCTAAGTTCAGACTAGTTGGAAGTATGTCTAGATCTCAGATTTCGTCACTAGAGGGCCCACGCTCTATTTTTATGATCCATTGATCTCCCTGACGCTGCAAGATTTGCAACCAGGCAGACTTGGCGGTAGGTCCTAGTGCAGCGGGGCTTTTTTTCTATAGTCCTTGAGAGGAGGAGACGTCAGTCCAGATATCTTTGATGTCCTGATTGGAAGGACCGTTGGCCCCCCACCCTTAGGCAGTGTATACTCTTCCATAAACGAGCTATTAGTTATGAGGTCCGTAGATTGAAAAGGGTGACGGAATTCGGCCGAACGGGAAAGACGGACATCTAGGTATCCTGAGCACGGTTGCGCGTCCGTATCAAGCTCCTCTTTATAGGCCCCGGTTACTGTTGGTCGTAGAGCCCAGAACGGGTTGGGCAGATGTACGACAATATCGCTTAGTCACCCTTGGGCCACGGTCCGCTACCTTACAGGAATTGAGACCGTCCATTAATTTCCCTTGCATATATATTGCGTTTCTTCGACCTTTTAACCGCTCTCTTAGAAGAGAGACAGATAGCTTCTTACCCGTGCCCCACCGTTGGCAGTACGATCGCACGCCCCACGTGAACGATTGGTAAACCCTGTGGCCTGTGAGCGACAAAAGCTTTAATGGGAAATACGCGCCCATAACTTGGTGCGAATACGGGTCGTAGCAATGTTCGTCTGAGTATGATCTATATAATACGGGCGGTACGTCTGCTTTGGTCAGCCTCTAATGGCTCGTATGATAGTGCAGCCGCTGGTGATCACTCAATGATCTCGGCTCCCCGTTGCAACTACGGGGATTCTTGGAGAGCCAGCTGCGTTCGGTATTGTGAGGACAGTGTAGTATTAGCAAACGATAAGTCCCGAACTAGTTGTGACCTAACGAAAAGAGAATTTCATAATACGTGCTGTCCCACGCGCATGGTACATTTGGACAATATTGAATGGAGTCTGATCAACCTTCACACCGATCTAGAATCGAATGCAAAGATCACCCAGGTGCAAATCAAAAATTCTAGGTAACTAGAAGATTTGCGACGTTCTAAGTGTTGGACGATATGAATCGCGACCCAGGATGACGTCGCCCTGAAAAAAAGATTTCTGCAACTCTCCTCGTCAGCAGTCTGGTGTATCGAAAGTACAGGACTAGCCTTCCTAGCAACCGCGGGCTGGGAATCTGAGACATGAGTCAAGATATTTGCTCGGTAACGTATGCTCTAGGCATCTAACTATTCCCTGTGTCTTATAGGGGCCTGCGTTATCTGCCTGTCGAACCATAGGATTCGTGTCAGCGCGCAGGCTTGGATCGAGATGAAATCTCCGGGGCCTAAGACTACGAGCATCTGGCGTCTTGGCTAACCCCCCTACATGTTGTTATAAACAATCAGTGGAAACCCAGTGCTAGAGGATGGAATGACCTTAAATCAGGGACGATATTAAACGGAACGTATATTCAACGCAATGAAGCCGGAGGATTGGCGTGGGAATCGTGCTTCTGTCTAAGCAAGTAAGGGTATGAGGTCGCAACCGTCCCCCAAGCGTACAGGGTGCACTTTGTAACGATTTGGGAGTCCAGAGACTCGCTGTTTTCGAAATTTGCCCTCAAGCGCGAGTATTGAACCAGGCTTACGCCCAAGAACGTAGCAAGCTGACTCAAACAAAATACATTTTGCCCGCGTTACATATGAATCAAGTTGGAAGTTATGGAGCATAGTAACATGTGGACGGCCAGTGGTGGGTTGCTACACCCCTGCGGCAACGTTGAAGCTCCTGGATTACACTGGCTGGATCTAAGCCGTGACACCCGTCATACTCCATAACCGTCTGTAACTCACGGCTTGTTCTGGACTGGATTGCCATTCTCTCAGAGTATTATGCAGGCCGGCGTACGGGTCCCATATAAACCTGTCATAGCTTACCTGACTCTACTTGGAAATGTGGCTAGGTCTTTGCCCACGCACCTAATCGGTCCTCGTTTGCTTTTTAGGACCCGATGAACTACAGAACACTGCAAGAATCTCTACCTGCTTTACAAAGTGCTGGATCCTATTCCAGCGGGATCTTTTATCTAAACACGATGAGAGGAGTATTCGTCAGGCCACATAGCTTTCTTGTTCTGATCGGAACGATCGTTGGCGCCCGACCCCCCGATTCCATAGTGAGTTCTTCGTCCGAGCCATTGTATGCGAGATCGATAGACTGATAGGGGATGCAGTATATCCCTGGATACAATAGACGCACAGGTTGGAATCCTAAGTGAAGTCGCGCGTCCGAACCCAGCTCTATTTTAGAGGTCATGGGTTCTGGTGCCCGCGAGCCGCGGAACCGATTAGGGGCATGTACAACAATATTTATTAGTCATCTTTCAGACACAATCTCCCAGCTCACTGGTATATAGTTCCTGCTATAATTAGCCTCCCTCATAAGTTGCACTACTTCAGCGTCCCAAATGCACCCTTACCACGAAGACAGGATTGTCCGATCCCATATTACGACCTTGGCAGGGGGTTCGCAAGTCCCACCCCAAACGATGCTGAAGGCTCAGGTTTCACAGGGACAAAAGCTTTAAACGCGAGTTCCCGCTCATAACCTGGACCGAATGCAGAATCATGCATCGTTCCACTGTGTTCGTGTCATCTAGGACGGGCGCAAAGGATATATAATTCAATTTTGAATACCTTATATTATTGTACACCTACCGGTCACCAGCCAACAATGTGCGGATGGCGTTACAACTTTCTGGGCCTAATCTGACCGTTCTAGATACCGCACTCTGGGCAATACGAGGTAAAGCCAGTCACCCAGTGTCGATCAACACCTAACCTAACGGTAAGAGGCTCACATAATGGCACTGTCGGCGTCCCCAGGGTATTTTACGTTAGCATCAGGTGGACTAACATGAATCTTTACTCCCAAGCGAAAACGGGTGCGTGGACTAGCGAGGAGCAAACGAAAATTCTTGGCCTGCTTGGTGTCTCGTATTCCTCTTAGAGATCGACGAAATGTTTCACGACCAAGGGAAAGGTCGCCCTACAAAATAGATTTGCGTTACTCTCTCCATAAGGAGTCCGGTGTAGCGAAGGATCAAGGCGACCCTAGGTAGCAACCGCCGGCTTCGGCGGTAAGGTATCACTCAAGAAGCAGACACAGTAAGACACGGTCTAGCTGACTGTCTATCGGCTAGGTCAAATAGAGAGCTTTGATATCTGCATGTCTAGCTTTAGAATTCAGTTTAGCGCGCTGATCTGAGTCGAGATAAAATCACCAGTACCCAAGACCAGGGGGGCTCGCCACGTTGGCTAATCCTGGTACATCTTGTAATCAATATTCAGTAGAAAATTTGTGTTAGAAGGACGAGTCACCATGTACCAATAGCGATAACGATCGGTCGGACTATTCATTGTGGTGATGACGCTGGGTTTACGTGGGAAAGGTGCTTGTGTCCCGACAGGCTAGGATATAATGCTGAGGCCCTTCCCCAAGCGTTCAGCGTGGGATTTGCTACAACTTCCGAGTCCTACATGTGCGTGTTCATGTTATGTATGCACAAGGCCGAGAATAGGACGTAGCCTTCGAGTTAGTACGTAGCGTGGTCGCACAAGCACAGTAGATCCTCCCCGCGCATCCTATTTATTAAGTTAATTCTAAAGCAATACGATCACATGTGGATGGGCAGTGGCCGGTTGTTACACGCCTACCGCGGTGCTGAATGACCGGGACTAAAGAGGCGAAGATTATGGCGTGTGACCCGTTATGCTCGAGTTCGGTCAGTGCGTCATTGCAAGTAGTCGATTGCTTTCTCAATCTCCGAGCGATTTAGCGTGACAGCCCCAGGGAACCCATAAAATGTGATCGCAGTCCATCCGATCGTACATAGAAAGGAAGGTCCCCATACGCCCACGCACCTGTTTACTCGTCGTATGCATAAAAGAGCCGCACGAACCACAGAGCATAAAGAGGACCTCTAGTTCCTTTACAAAGTACAGGTTCGCTTTTCGGCGAGATGCCTTACCTAGATGCAATGACGGACGTATTCCTCTGGCCACATCGGTTCCTGCTTTCGCTGGGATCCAAGATTGGCAGCTGAAGCCGCCTTTCCATAGTGAGTCCTTCGTCTGTGACTAACTGTGCCAAATCGTCTAGCAAACTGCTGATCCAGTTTAACTCACCAAATTATAGCCGTACAGACCGAAATCTTAAGTCATATCACGCGACTAGCCTCTGCTTAATTTTTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGGTGCAGCCGATTAGGACCATGTAATACATTTGTTACAAGACTTCTTTTAAACACTTTCTTCCTGCCCAGTAGCGGATGATAATCGTTGTTGCCAGCCGGCGTGGAAGGTAACAGCACCGGTGCGAGCCTAATGTGCCGTCTCCACGAACACAAGGCTGTCCGATCGTATAATAGGATTCCGCAATGGGGTTAGCAAGTGGCAGCCTAAACGATATCGGGGACTTGCGATGTACATGCTTTGGTACAATACATACGTGATCCAGTTGTTATCCTGCATCGGAACATCAATTGTGCATCGGACCAGCATATTCATGTCATCTAGGAGGCGCGCGTAGGATAAATAATTCAATTAAGATGTCGTTTTGCTAGTATACGTCTAGGCGTCACCCGCCATCTGTGTGCAGGTGGGCCGACGAGACACTGTCCCTGATTTCTCCGCTTCTAATAGCACACACGGGGCAATACCAGCACAAGCCAGTCTCGCAGCAACGCTCGTCAGCAAACGAAAGAGCTTAAGGCTCGCCAATTCGCACTGTCAGGGTCGCTTGGGTGTTTTGCACTAGCGTCAGGTACGCTAGTATGCGTTCTTCCTTCCAGGGGTATGTGGCTGCGTGGTCAAATGTGCGGCATACGTATTTGCTCGACGTGTTTGCTCTCACGAACTTGACCTGGAGATCAAGGAGATGTTTCTTGTCGAACTGGACAGCGCTTCAACGGAACGGATCTACGTTACAGCCTGCATAATGAAAACGGAGTTGCCGACGACGAAAGCGACTTTGGGTTCTGTCTGTTGTCATTGGCGGAAAACTTCCGTTCAGGAGGCGGACACTGATTGACACGGTTTAGCAGAAGGTTTGAGGAATAGGTTAAATTGAGTGGTTTAATAACGGTATGTCTGGGATTAAAGTGTAGTATAGTGTGATTATCGGAGACGGTTTTAAGACACGAGTTCCCAAAATCAAGCGGGGTCATTACAACGGTTATTCCTGGTAGTTTAGGTGTACAATGTCCTGAAGAATATTTAAGAAAAAAGCACCCCTCATCGCCTAGAATTACCTACTACGGTCGACCATACCTTCGATTATCGCGGCCACTCTCGCATTAGTCGGCAGAGGTGGTTGTGTTGCGATAGCCCAGTATAATATTCTAAGGCGTTACCCTGATGAATATCCAACGGAATTGCTATAGGCCTTGAACGCTACACGGACGATACGAAATTATGTATGGACCGGGTCATCAAAAGGTTATACCCTTGTAGTTAACATGTAGCCCGGCCCTATTAGTACAGTAGTGCCTTGAATGGCATTCTCTTTATTAAGTTTTCTCTACAGCTAAACGATCAAGTGCACTTCCACAGAGCGCGGTGGAGATTCATTCACTCGGCAGCTCTGTAATAGGGACTAAAAAAGTGATGATAATCATGAGTGCCGCGTTATGGTGGTGTCGGAACAGAGCGGTCTTACGGCCAGTCGTATGCCTTCTCGAGTTCCGTCCAGTTAAGCGTGACAGTCCCAGTGTACCCACAAACCGTGATGGCTGTGCTTGGAGTCAATCGCAAGTAGGATGGTCTCCAGACACCGGGGCACCAGTTTTCACGCCGAAAGCATAAACGACGAGCAGATATGAAAGTGTTAGAACTGGACGTGCCGTTTCTCTGCGAAGAACACCTCGAGCTGTAGCGTTGTTGCGCTGCCTAGATGCAGTGTTGCTCATATCACATTTGCTTCAACGACTGCCGCCTTCGCTGTATCCCTAGACACTCAACAGTAAGCGCTTTTTGTAGGCAGGGGCACCCCCTATCAGTGACTGCGCCAAAACATCTTCGGATCCCCTTGTCCAATCAAACTCATCGAATTCTTACATTTAAGACCCTAATATCACATCATTAGTGATTAATTGCCACTGCCAAAATTCTGTCCAGAAGCGTTTTAGTTCGCTCCACTAAAGTTGTTTAAAACGACTACCAAATCCGCATGTTAGGGGATTTCTTATTAATTCTTTTATCGTGAGGAACAGCGGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTAATAGCGCGGGTGAGAGGGTAATCAGCCGTGTTCACCTACACAACGCTAACGGGCGATTCTATAAGATTCCGCATTGCGTCTACTTATAAGATGTCTCAACGGTATCCGCAACTTGTGAAGTGCCTACTATCCTTAAACGCATATCTCGCCCAGTAGCTTCCCAATATGTGAGCATCAATTGTTGTCCGGGCCGAGATAGTCATGTGCTCACGGAACTTACTGTATGAGTAGTGATTTGAAAGAGTTGTCAGTTTGCTGGTTCAGGTAAAGGTTCCTCACGCTACCTCAAAGTAAGAGAGCGGTCGTGACATTATCCGTGATTTTCTCACTACTATTAGTACTCACGACTCGATTCTGCCGCAGCCATGTTTCGCCAGAATGCCAGTCAGCATTAAGGAGAGCTCAGGGCAGGTCAACTCGCATAGTGAGGGTTACATGTTCGTTGGGCTCTTCCGACACGAACCTCAGTTAGCCTACATCCTACCAGAGGTCTGTGCCCCGGTGGTGAGAAGTGCGGATTTCGTATTTGCAGCTCGTCAGTACTTTCAGAATCATGGCCTGCACGGCAAAATGACGCTTATAATGGACTTCGACATGGCAATAACGCCTCGTTTCTACGTCAGGAGGAGAATAGTATAAACATAACTGCTGTCGGCAGAAGCGCCAAAGGAGTCTCTGAATTCTTATTCCCGAATAACATCCGTCTCCGTGCGGGAAAATCACCGACGGCGTTTTATAGAAGCCTAGGGGAACAGATTGGTCTAATTAGCTTAAGAGAGTAAATTCTGGGATCATTCAGTAGTAATCACAAATTTACGGTGGGGCTTTTTTGGCGGATCTTTACAGATACTAACCAGGTGATTTCAACTAATTTAGTTGACGATTTAGGCGCGCTATCCCGTAATCTTCAAATTAAAACATAGCGTTCCATGAGGGCTAGAATTACTTACCGGCCTTCACCATGCCTGCGTTATTCGCGCCCACTCTCCCATTTATCCGCGCAAGCGGATGCGATGCGATTGCCCGCTAAGATATTCTTACGTGTAACGTAGCTAAGTATTCTACAGAGCTGGCGTACGCGTTGAACACTTCACAGATGATAGGGATTCG
diff -r 000000000000 -r 9fee6d81910d test-data/sequence2.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sequence2.txt Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,1 @@
+CTCACTTGTGGATACCCCGACCTATTTTGACGGGACCACTCGCGGTAGTCGTTGGGCTTATGCACCGTAAAGTCCTCCGCCGGCCTCCCCCCTACAAAAGATGATAAGCTCCGGCAAGCAATATTGAACAACGCAAGGATCGGCGATATAAACAGAGAAACGGCTGATTACTCTTGTTGGTGTGGTATCGCTAAACTGCGTCGCGGAGCCTTATGGCATAGTCGTCCGCGGAGCACTCTGGTAACGCTTATGGTCCATAGCACATTCATCGCATCCGGGCATGCGCTCTATTTGACGATCCCTTGGCGCAGAGATGCTGGCCACGAGCTAAATTAAAGCGACTGCACTACTGTAAGGTCCGTCACGCAGACGACGGCCCGGGGAGAGCACTAACCCATCAACCTGTACGGGAACTTTCTATATCGTTCTCGGACGGAGAGATAACTACAGTGCCGCTTACAGCCCCTCTGTCGTCGCCGACGTCTGTAATATAGCCTTGTTGTGATTCCACCCTATTGAGGCATTGACTGATGCGGAAGGAGATCTGGAATGAACTGGTCTATGTGACAGAAACTGTGCAAGTACCTAATCTCGTTAGTGTAGGTTCTGACCGATACGTGCTTCGTTGAGAACTCACAATTTTACAACTGGGGACATAAGCCCTACGCCCATCATCTACTGACGTCCCTGAGGCTGCAGTTCATGTAATGGGACAGTATCCGCCGCAAGTTCTAGTGCAATGGCGGTATAGTACGCTCGTACTGTAGTAGAGGCGACACGGGTGGGATCATCACTAATAAGGATACTGGGAAGACTCACAGGCCTCCGCCTATAGGCGGTGCTTACTCTTACATAAAGCGGCTGTTAGTATTACCCCGCGAGGATTCGAAAAGGTGAACCAACCCGGTCGATCCGGAGGGACGGGCCTCAAAGCCGCGTGACGACGGCTGTCGGCCCGTAACAGAATCCCCGCAATAAGCTCCCGTGAGCCTCGATTGAACAGCCCTGGTGGGCCCCATCAGTAGCCCGAATATGTCGCTTTTCGGGTCCTGGGCCGAGGAGCGATACCTTCCAGTAATCGAGGCCGTTCGTTAATTCTTGTTGCGTTCCTAGCGCCTATATTTGTCTCTTTGCCGGCTTATGTGGACAAGCATAGCATAGCCATTTATCGGAGCGCCTCGGTACACGGTATGACCAGACGCCTCGTGAGACCATTACGTATACCAGGTGTCCTGTGAGCAGCGAAGGCCCATACGCGAGATACACTGCCAGAAATCCGCGTGATTACGAGTCGTGGTAAATTTAATCTGGCTGTGGTCTAGACATTCCAGGCGGTGCGTCTGCTGTCGGGTGCCTCTGGTGACTGGCTAGATGGACTTGCCGCTGGTAAACACACCATGACCCCGCCTCTCCATTGATGCCACGGCGAATGTCGGGGAGACAGCAGCGACTGCAGACATCAGATCAGAGTAATACTAACATGCGATAAGTCCCTAACTGACTATGGCCTTCTGTAGAGTCTACTTCACCAGATATGCTGTCTCTGGCACGTGGATGGTTTAGAGGAATCACATTCAAGTCTGGTTAACCATGAAACAAGTCTTGAGTGTAAAATTGTCGTCTCCTGTGTACGAGATGGAGGTACTAGATGACTGCAGGGACTCCGACGTTATGTACGTTGCTCCGTCAAAGGCGCCATTCAGGATCACGTTACCGCCAAAAAATGGGAGCAGGAGCTCTTCTCCCCTGCGGTCACGTCTATAGAAATTACACCTTTAACCCTCCTGAGAACCGGGAGGCGGGAATCCGTCACGTATGAGAAGGTATTTGCCCGACAATCAATACCGGACGCTCCTAAGTTTTTCCACTCGCTTGAGCCGGCTAGGCCTCTCTGCCCGAAGTTTCGACGGACTGCTGCCAACGCCCAGGCATAGTTTTAGGAGGATTATTCGGGGGCACTGGCAACCAACTTCTCGGGTCCTGCCCGACTGGTCTTCGGGCTAATATAGCGAATTGCCGAGAACCCGGCCCCACGCAATGGAACGTCCTTAGCTCCGGCAGGCAATTAAGGGGAACGTAAGTATAGCGCAAAAAAACAGAGAAATAGGCGAATGAATCTATTCTTACTGTATCGAAGAATGGCCTCGCGGAGGCATGTGTCATGCTAGCGTGCGGGGTACTCTAGTTATCCATATGGTCCACAGGACACTCGTTGCTTTCGGATTTGCCCTTTATGCGCCGGTTTTCAGCCACGCTTATGCTCAGCATCGTTATAACCAGACCGATACTAGATCTATAAAGTCCGCCATGCAGACGAGACCAGACGGAGATTACCGAGCAATCTATCAGATCGGCGACCATTAGTGAGCTACTGGAGCCGAGGGGTAACTACGATGCCGCTAAGAACCTCTCGGTCGTCGCTAGCGATTACACTCCAGTCTCATTATAATCGTTCGCTATTCAGGGATTGACCAACACCGGAAAACATTTCACTTGAAGTATTGTATACGACAGAGTCCGTGCACCTACCAAACCTGTTTAATCTAAGTTCAGACTAGTTGGAAGTATGTCTAGATCTCAGATTTCGTCACTAGAGGGCCCACGCTCTATTTTTATGATCCATTGATCTCCCTGACGCTGCAAGATTTGCAACCAGGCAGACTTGGCGGTAGGTCCTAGTGCAGCGGGGCTTTTTTTCTATAGTCCTTGAGAGGAGGAGACGTCAGTCCAGATATCTTTGATGTCCTGATTGGAAGGACCGTTGGCCCCCCACCCTTAGGCAGTGTATACTCTTCCATAAACGAGCTATTAGTTATGAGGTCCGTAGATTGAAAAGGGTGACGGAATTCGGCCGAACGGGAAAGACGGACATCTAGGTATCCTGAGCACGGTTGCGCGTCCGTATCAAGCTCCTCTTTATAGGCCCCGGTTACTGTTGGTCGTAGAGCCCAGAACGGGTTGGGCAGATGTACGACAATATCGCTTAGTCACCCTTGGGCCACGGTCCGCTACCTTACAGGAATTGAGACCGTCCATTAATTTCCCTTGCATATATATTGCGTTTCTTCGACCTTTTAACCGCTCTCTTAGAAGAGAGACAGATAGCTTCTTACCCGTGCCCCACCGTTGGCAGTACGATCGCACGCCCCACGTGAACGATTGGTAAACCCTGTGGCCTGTGAGCGACAAAAGCTTTAATGGGAAATACGCGCCCATAACTTGGTGCGAATACGGGTCGTAGCAATGTTCGTCTGAGTATGATCTATATAATACGGGCGGTACGTCTGCTTTGGTCAGCCTCTAATGGCTCGTATGATAGTGCAGCCGCTGGTGATCACTCAATGATCTCGGCTCCCCGTTGCAACTACGGGGATTCTTGGAGAGCCAGCTGCGTTCGGTATTGTGAGGACAGTGTAGTATTAGCAAACGATAAGTCCCGAACTAGTTGTGACCTAACGAAAAGAGAATTTCATAATACGTGCTGTCCCACGCGCATGGTACATTTGGACAATATTGAATGGAGTCTGATCAACCTTCACACCGATCTAGAATCGAATGCAAAGATCACCCAGGTGCAAATCAAAAATTCTAGGTAACTAGAAGATTTGCGACGTTCTAAGTGTTGGACGATATGAATCGCGACCCAGGATGACGTCGCCCTGAAAAAAAGATTTCTGCAACTCTCCTCGTCAGCAGTCTGGTGTATCGAAAGTACAGGACTAGCCTTCCTAGCAACCGCGGGCTGGGAATCTGAGACATGAGTCAAGATATTTGCTCGGTAACGTATGCTCTAGGCATCTAACTATTCCCTGTGTCTTATAGGGGCCTGCGTTATCTGCCTGTCGAACCATAGGATTCGTGTCAGCGCGCAGGCTTGGATCGAGATGAAATCTCCGGGGCCTAAGACTACGAGCATCTGGCGTCTTGGCTAACCCCCCTACATGTTGTTATAAACAATCAGTGGAAACCCAGTGCTAGAGGATGGAATGACCTTAAATCAGGGACGATATTAAACGGAACGTATATTCAACGCAATGAAGCCGGAGGATTGGCGTGGGAATCGTGCTTCTGTCTAAGCAAGTAAGGGTATGAGGTCGCAACCGTCCCCCAAGCGTACAGGGTGCACTTTGTAACGATTTGGGAGTCCAGAGACTCGCTGTTTTCGAAATTTGCCCTCAAGCGCGAGTATTGAACCAGGCTTACGCCCAAGAACGTAGCAAGCTGACTCAAACAAAATACATTTTGCCCGCGTTACATATGAATCAAGTTGGAAGTTATGGAGCATAGTAACATGTGGACGGCCAGTGGTGGGTTGCTACACCCCTGCGGCAACGTTGAAGCTCCTGGATTACACTGGCTGGATCTAAGCCGTGACACCCGTCATACTCCATAACCGTCTGTAACTCACGGCTTGTTCTGGACTGGATTGCCATTCTCTCAGAGTATTATGCAGGCCGGCGTACGGGTCCCATATAAACCTGTCATAGCTTACCTGACTCTACTTGGAAATGTGGCTAGGTCTTTGCCCACGCACCTAATCGGTCCTCGTTTGCTTTTTAGGACCCGATGAACTACAGAACACTGCAAGAATCTCTACCTGCTTTACAAAGTGCTGGATCCTATTCCAGCGGGATCTTTTATCTAAACACGATGAGAGGAGTATTCGTCAGGCCACATAGCTTTCTTGTTCTGATCGGAACGATCGTTGGCGCCCGACCCCCCGATTCCATAGTGAGTTCTTCGTCCGAGCCATTGTATGCGAGATCGATAGACTGATAGGGGATGCAGTATATCCCTGGATACAATAGACGCACAGGTTGGAATCCTAAGTGAAGTCGCGCGTCCGAACCCAGCTCTATTTTAGAGGTCATGGGTTCTGGTGCCCGCGAGCCGCGGAACCGATTAGGGGCATGTACAACAATATTTATTAGTCATCTTTCAGACACAATCTCCCAGCTCACTGGTATATAGTTCCTGCTATAATTAGCCTCCCTCATAAGTTGCACTACTTCAGCGTCCCAAATGCACCCTTACCACGAAGACAGGATTGTCCGATCCCATATTACGACCTTGGCAGGGGGTTCGCAAGTCCCACCCCAAACGATGCTGAAGGCTCAGGTTTCACAGGGACAAAAGCTTTAAACGCGAGTTCCCGCTCATAACCTGGACCGAATGCAGAATCATGCATCGTTCCACTGTGTTCGTGTCATCTAGGACGGGCGCAAAGGATATATAATTCAATTTTGAATACCTTATATTATTGTACACCTACCGGTCACCAGCCAACAATGTGCGGATGGCGTTACAACTTTCTGGGCCTAATCTGACCGTTCTAGATACCGCACTCTGGGCAATACGAGGTAAAGCCAGTCACCCAGTGTCGATCAACACCTAACCTAACGGTAAGAGGCTCACATAATGGCACTGTCGGCGTCCCCAGGGTATTTTACGTTAGCATCAGGTGGACTAACATGAATCTTTACTCCCAAGCGAAAACGGGTGCGTGGACTAGCGAGGAGCAAACGAAAATTCTTGGCCTGCTTGGTGTCTCGTATTCCTCTTAGAGATCGACGAAATGTTTCACGACCAAGGGAAAGGTCGCCCTACAAAATAGATTTGCGTTACTCTCTCCATAAGGAGTCCGGTGTAGCGAAGGATCAAGGCGACCCTAGGTAGCAACCGCCGGCTTCGGCGGTAAGGTATCACTCAAGAAGCAGACACAGTAAGACACGGTCTAGCTGACTGTCTATCGGCTAGGTCAAATAGAGAGCTTTGATATCTGCATGTCTAGCTTTAGAATTCAGTTTAGCGCGCTGATCTGAGTCGAGATAAAATCACCAGTACCCAAGACCAGGGGGGCTCGCCACGTTGGCTAATCCTGGTACATCTTGTAATCAATATTCAGTAGAAAATTTGTGTTAGAAGGACGAGTCACCATGTACCAATAGCGATAACGATCGGTCGGACTATTCATTGTGGTGATGACGCTGGGTTTACGTGGGAAAGGTGCTTGTGTCCCGACAGGCTAGGATATAATGCTGAGGCCCTTCCCCAAGCGTTCAGCGTGGGATTTGCTACAACTTCCGAGTCCTACATGTGCGTGTTCATGTTATGTATGCACAAGGCCGAGAATAGGACGTAGCCTTCGAGTTAGTACGTAGCGTGGTCGCACAAGCACAGTAGATCCTCCCCGCGCATCCTATTTATTAAGTTAATTCTAAAGCAATACGATCACATGTGGATGGGCAGTGGCCGGTTGTTACACGCCTACCGCGGTGCTGAATGACCGGGACTAAAGAGGCGAAGATTATGGCGTGTGACCCGTTATGCTCGAGTTCGGTCAGTGCGTCATTGCAAGTAGTCGATTGCTTTCTCAATCTCCGAGCGATTTAGCGTGACAGCCCCAGGGAACCCATAAAATGTGATCGCAGTCCATCCGATCGTACATAGAAAGGAAGGTCCCCATACGCCCACGCACCTGTTTACTCGTCGTATGCATAAAAGAGCCGCACGAACCACAGAGCATAAAGAGGACCTCTAGTTCCTTTACAAAGTACAGGTTCGCTTTTCGGCGAGATGCCTTACCTAGATGCAATGACGGACGTATTCCTCTGGCCACATCGGTTCCTGCTTTCGCTGGGATCCAAGATTGGCAGCTGAAGCCGCCTTTCCATAGTGAGTCCTTCGTCTGTGACTAACTGTGCCAAATCGTCTAGCAAACTGCTGATCCAGTTTAACTCACCAAATTATAGCCGTACAGACCGAAATCTTAAGTCATATCACGCGACTAGCCTCTGCTTAATTTTTGTGCTCAAGGGTTTTGGTCCGCCCGAGCGGTGCAGCCGATTAGGACCATGTAATACATTTGTTACAAGACTTCTTTTAAACACTTTCTTCCTGCCCAGTAGCGGATGATAATCGTTGTTGCCAGCCGGCGTGGAAGGTAACAGCACCGGTGCGAGCCTAATGTGCCGTCTCCACGAACACAAGGCTGTCCGATCGTATAATAGGATTCCGCAATGGGGTTAGCAAGTGGCAGCCTAAACGATATCGGGGACTTGCGATGTACATGCTTTGGTACAATACATACGTGATCCAGTTGTTATCCTGCATCGGAACATCAATTGTGCATCGGACCAGCATATTCATGTCATCTAGGAGGCGCGCGTAGGATAAATAATTCAATTAAGATGTCGTTTTGCTAGTATACGTCTAGGCGTCACCCGCCATCTGTGTGCAGGTGGGCCGACGAGACACTGTCCCTGATTTCTCCGCTTCTAATAGCACACACGGGGCAATACCAGCACAAGCCAGTCTCGCAGCAACGCTCGTCAGCAAACGAAAGAGCTTAAGGCTCGCCAATTCGCACTGTCAGGGTCGCTTGGGTGTTTTGCACTAGCGTCAGGTACGCTAGTATGCGTTCTTCCTTCCAGGGGTATGTGGCTGCGTGGTCAAATGTGCGGCATACGTATTTGCTCGACGTGTTTGCTCTCACGAACTTGACCTGGAGATCAAGGAGATGTTTCTTGTCGAACTGGACAGCGCTTCAACGGAACGGATCTACGTTACAGCCTGCATAATGAAAACGGAGTTGCCGACGACGAAAGCGACTTTGGGTTCTGTCTGTTGTCATTGGCGGAAAACTTCCGTTCAGGAGGCGGACACTGATTGACACGGTTTAGCAGAAGGTTTGAGGAATAGGTTAAATTGAGTGGTTTAATAACGGTATGTCTGGGATTAAAGTGTAGTATAGTGTGATTATCGGAGACGGTTTTAAGACACGAGTTCCCAAAATCAAGCGGGGTCATTACAACGGTTATTCCTGGTAGTTTAGGTGTACAATGTCCTGAAGAATATTTAAGAAAAAAGCACCCCTCATCGCCTAGAATTACCTACTACGGTCGACCATACCTTCGATTATCGCGGCCACTCTCGCATTAGTCGGCAGAGGTGGTTGTGTTGCGATAGCCCAGTATAATATTCTAAGGCGTTACCCTGATGAATATCCAACGGAATTGCTATAGGCCTTGAACGCTACACGGACGATACGAAATTATGTATGGACCGGGTCATCAAAAGGTTATACCCTTGTAGTTAACATGTAGCCCGGCCCTATTAGTACAGTAGTGCCTTGAATGGCATTCTCTTTATTAAGTTTTCTCTACAGCTAAACGATCAAGTGCACTTCCACAGAGCGCGGTGGAGATTCATTCACTCGGCAGCTCTGTAATAGGGACTAAAAAAGTGATGATAATCATGAGTGCCGCGTTATGGTGGTGTCGGAACAGAGCGGTCTTACGGCCAGTCGTATGCCTTCTCGAGTTCCGTCCAGTTAAGCGTGACAGTCCCAGTGTACCCACAAACCGTGATGGCTGTGCTTGGAGTCAATCGCAAGTAGGATGGTCTCCAGACACCGGGGCACCAGTTTTCACGCCGAAAGCATAAACGACGAGCAGATATGAAAGTGTTAGAACTGGACGTGCCGTTTCTCTGCGAAGAACACCTCGAGCTGTAGCGTTGTTGCGCTGCCTAGATGCAGTGTTGCTCATATCACATTTGCTTCAACGACTGCCGCCTTCGCTGTATCCCTAGACACTCAACAGTAAGCGCTTTTTGTAGGCAGGGGCACCCCCTATCAGTGACTGCGCCAAAACATCTTCGGATCCCCTTGTCCAATCAAACTCATCGAATTCTTACATTTAAGACCCTAATATCACATCATTAGTGATTAATTGCCACTGCCAAAATTCTGTCCAGAAGCGTTTTAGTTCGCTCCACTAAAGTTGTTTAAAACGACTACCAAATCCGCATGTTAGGGGATTTCTTATTAATTCTTTTATCGTGAGGAACAGCGGATCTTAATGGATGGCCGCAGGTGGTATGGAAGCTAATAGCGCGGGTGAGAGGGTAATCAGCCGTGTTCACCTACACAACGCTAACGGGCGATTCTATAAGATTCCGCATTGCGTCTACTTATAAGATGTCTCAACGGTATCCGCAACTTGTGAAGTGCCTACTATCCTTAAACGCATATCTCGCCCAGTAGCTTCCCAATATGTGAGCATCAATTGTTGTCCGGGCCGAGATAGTCATGTGCTCACGGAACTTACTGTATGAGTAGTGATTTGAAAGAGTTGTCAGTTTGCTGGTTCAGGTAAAGGTTCCTCACGCTACCTCAAAGTAAGAGAGCGGTCGTGACATTATCCGTGATTTTCTCACTACTATTAGTACTCACGACTCGATTCTGCCGCAGCCATGTTTCGCCAGAATGCCAGTCAGCATTAAGGAGAGCTCAGGGCAGGTCAACTCGCATAGTGAGGGTTACATGTTCGTTGGGCTCTTCCGACACGAACCTCAGTTAGCCTACATCCTACCAGAGGTCTGTGCCCCGGTGGTGAGAAGTGCGGATTTCGTATTTGCAGCTCGTCAGTACTTTCAGAATCATGGCCTGCACGGCAAAATGACGCTTATAATGGACTTCGACATGGCAATAACGCCTCGTTTCTACGTCAGGAGGAGAATAGTATAAACATAACTGCTGTCGGCAGAAGCGCCAAAGGAGTCTCTGAATTCTTATTCCCGAATAACATCCGTCTCCGTGCGGGAAAATCACCGACGGCGTTTTATAGAAGCCTAGGGGAACAGATTGGTCTAATTAGCTTAAGAGAGTAAATTCTGGGATCATTCAGTAGTAATCACAAATACGGTGGGGCTTTTTTGGCGGATCTTTACAGATACTAACCAGGTGATTTCAACTAATTTAGTTGACGATTTAGGCGCGCTATCCCGTAATCTTCAAATTAAAACATAGCGTTCCATGAGGGCTAGAATTACTTACCGGCCTTCACCATGCCTGCGTTATTCGCGCCCACTCTCCCATTTATCCGCGCAAGCGGATGCGATGCGATTGCCCGCTAAGATATTCTTACGTGTAACGTAGCTAAGTATTCTACAGAGCTGGCGTACGCGTTGAACACTTCACAGATGATAG
diff -r 000000000000 -r 9fee6d81910d test-data/test1.png
Binary file test-data/test1.png has changed
diff -r 000000000000 -r 9fee6d81910d test-data/test1_copy.png
Binary file test-data/test1_copy.png has changed
diff -r 000000000000 -r 9fee6d81910d test-data/test1_iscc.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test1_iscc.txt Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,1 @@
+K4AOMGOGQJA4Y46PAC4YPPA63GKD5RVFPR7FU3I4OOEW44TYXNYOTMY
\ No newline at end of file
diff -r 000000000000 -r 9fee6d81910d test-data/test2.tiff
Binary file test-data/test2.tiff has changed
diff -r 000000000000 -r 9fee6d81910d test-data/test2_similar.tiff
Binary file test-data/test2_similar.tiff has changed
diff -r 000000000000 -r 9fee6d81910d test-data/test3.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test3.fasta Fri Dec 19 15:02:49 2025 +0000
@@ -0,0 +1,58 @@
+>sp|P51587|BRCA2_HUMAN Breast cancer type 2 susceptibility protein OS=Homo sapiens OX=9606 GN=BRCA2 PE=1 SV=4
+MPIGSKERPTFFEIFKTRCNKADLGPISLNWFEELSSEAPPYNSEPAEESEHKNNNYEPN
+LFKTPQRKPSYNQLASTPIIFKEQGLTLPLYQSPVKELDKFKLDLGRNVPNSRHKSLRTV
+KTKMDQADDVSCPLLNSCLSESPVVLQCTHVTPQRDKSVVCGSLFHTPKFVKGRQTPKHI
+SESLGAEVDPDMSWSSSLATPPTLSSTVLIVRNEEASETVFPHDTTANVKSYFSNHDESL
+KKNDRFIASVTDSENTNQREAASHGFGKTSGNSFKVNSCKDHIGKSMPNVLEDEVYETVV
+DTSEEDSFSLCFSKCRTKNLQKVRTSKTRKKIFHEANADECEKSKNQVKEKYSFVSEVEP
+NDTDPLDSNVANQKPFESGSDKISKEVVPSLACEWSQLTLSGLNGAQMEKIPLLHISSCD
+QNISEKDLLDTENKRKKDFLTSENSLPRISSLPKSEKPLNEETVVNKRDEEQHLESHTDC
+ILAVKQAISGTSPVASSFQGIKKSIFRIRESPKETFNASFSGHMTDPNFKKETEASESGL
+EIHTVCSQKEDSLCPNLIDNGSWPATTTQNSVALKNAGLISTLKKKTNKFIYAIHDETSY
+KGKKIPKDQKSELINCSAQFEANAFEAPLTFANADSGLLHSSVKRSCSQNDSEEPTLSLT
+SSFGTILRKCSRNETCSNNTVISQDLDYKEAKCNKEKLQLFITPEADSLSCLQEGQCEND
+PKSKKVSDIKEEVLAAACHPVQHSKVEYSDTDFQSQKSLLYDHENASTLILTPTSKDVLS
+NLVMISRGKESYKMSDKLKGNNYESDVELTKNIPMEKNQDVCALNENYKNVELLPPEKYM
+RVASPSRKVQFNQNTNLRVIQKNQEETTSISKITVNPDSEELFSDNENNFVFQVANERNN
+LALGNTKELHETDLTCVNEPIFKNSTMVLYGDTGDKQATQVSIKKDLVYVLAEENKNSVK
+QHIKMTLGQDLKSDISLNIDKIPEKNNDYMNKWAGLLGPISNHSFGGSFRTASNKEIKLS
+EHNIKKSKMFFKDIEEQYPTSLACVEIVNTLALDNQKKLSKPQSINTVSAHLQSSVVVSD
+CKNSHITPQMLFSKQDFNSNHNLTPSQKAEITELSTILEESGSQFEFTQFRKPSYILQKS
+TFEVPENQMTILKTTSEECRDADLHVIMNAPSIGQVDSSKQFEGTVEIKRKFAGLLKNDC
+NKSASGYLTDENEVGFRGFYSAHGTKLNVSTEALQKAVKLFSDIENISEETSAEVHPISL
+SSSKCHDSVVSMFKIENHNDKTVSEKNNKCQLILQNNIEMTTGTFVEEITENYKRNTENE
+DNKYTAASRNSHNLEFDGSDSSKNDTVCIHKDETDLLFTDQHNICLKLSGQFMKEGNTQI
+KEDLSDLTFLEVAKAQEACHGNTSNKEQLTATKTEQNIKDFETSDTFFQTASGKNISVAK
+ESFNKIVNFFDQKPEELHNFSLNSELHSDIRKNKMDILSYEETDIVKHKILKESVPVGTG
+NQLVTFQGQPERDEKIKEPTLLGFHTASGKKVKIAKESLDKVKNLFDEKEQGTSEITSFS
+HQWAKTLKYREACKDLELACETIEITAAPKCKEMQNSLNNDKNLVSIETVVPPKLLSDNL
+CRQTENLKTSKSIFLKVKVHENVEKETAKSPATCYTNQSPYSVIENSALAFYTSCSRKTS
+VSQTSLLEAKKWLREGIFDGQPERINTADYVGNYLYENNSNSTIAENDKNHLSEKQDTYL
+SNSSMSNSYSYHSDEVYNDSGYLSKNKLDSGIEPVLKNVEDQKNTSFSKVISNVKDANAY
+PQTVNEDICVEELVTSSSPCKNKNAAIKLSISNSNNFEVGPPAFRIASGKIVCVSHETIK
+KVKDIFTDSFSKVIKENNENKSKICQTKIMAGCYEALDDSEDILHNSLDNDECSTHSHKV
+FADIQSEEILQHNQNMSGLEKVSKISPCDVSLETSDICKCSIGKLHKSVSSANTCGIFST
+ASGKSVQVSDASLQNARQVFSEIEDSTKQVFSKVLFKSNEHSDQLTREENTAIRTPEHLI
+SQKGFSYNVVNSSAFSGFSTASGKQVSILESSLHKVKGVLEEFDLIRTEHSLHYSPTSRQ
+NVSKILPRVDKRNPEHCVNSEMEKTCSKEFKLSNNLNVEGGSSENNHSIKVSPYLSQFQQ
+DKQQLVLGTKVSLVENIHVLGKEQASPKNVKMEIGKTETFSDVPVKTNIEVCSTYSKDSE
+NYFETEAVEIAKAFMEDDELTDSKLPSHATHSLFTCPENEEMVLSNSRIGKRRGEPLILV
+GEPSIKRNLLNEFDRIIENQEKSLKASKSTPDGTIKDRRLFMHHVSLEPITCVPFRTTKE
+RQEIQNPNFTAPGQEFLSKSHLYEHLTLEKSSSNLAVSGHPFYQVSATRNEKMRHLITTG
+RPTKVFVPPFKTKSHFHRVEQCVRNINLEENRQKQNIDGHGSDDSKNKINDNEIHQFNKN
+NSNQAVAVTFTKCEEEPLDLITSLQNARDIQDMRIKKKQRQRVFPQPGSLYLAKTSTLPR
+ISLKAAVGGQVPSACSHKQLYTYGVSKHCIKINSKNAESFQFHTEDYFGKESLWTGKGIQ
+LADGGWLIPSNDGKAGKEEFYRALCDTPGVDPKLISRIWVYNHYRWIIWKLAAMECAFPK
+EFANRCLSPERVLLQLKYRYDTEIDRSRRSAIKKIMERDDTAAKTLVLCVSDIISLSANI
+SETSSNKTSSADTQKVAIIELTDGWYAVKAQLDPPLLAVLKNGRLTVGQKIILHGAELVG
+SPDACTPLEAPESLMLKISANSTRPARWYTKLGFFPDPRPFPLPLSSLFSDGGNVGCVDV
+IIQRAYPIQWMEKTSSGLYIFRNEREEEKEAAKYVEAQQKRLEALFTKIQEEFEEHEENT
+TKPYLPSRALTRQQVRALQDGAELYEAVKNAADPAYLEGYFSEEQLRALNNHRQMLNDKK
+QAQIQLEIRKAMESAEQKEQGLSRDVTTVWKLRIVSYSKKEKDSVILSIWRPSSDLYSLL
+TEGKRYRIYHLATSKSKSKSERANIQLAATKKTQYQQLPVSDEILFQIYQPREPLHFSKF
+LDPDFQPSCSEVDLIGFVVSVVKKTGLAPFVYLSDECYNLLAIKFWIDLNEDIIKPHMLI
+AASNLQWRPESKSGLLTLFAGDFSVFSASPKEGHFQETFNKMKNTVENIDILCNEAENKL
+MHILHANDPKWSTPTKDCTSGPYTAQIIPGTGNKLLMSSPNCEIYYQSPLSLCMAKRKSV
+STPVSAQMTSKSCKGEKEIDDQKNCKKRRALDFLSRLPLPPPVSPICTFVSPAAQKAFQP
+PRSCGTKYETPIKKKELNSPQMTPFKKFNEISLLESNSIADEELALINTQALLSGSTGEK
+QFISVSESTRTAPTSSEDYLRLKRRCTTSLIKEQESSQASTEECEKNKQDTITTKKYI