comparison iscc_sum.xml @ 1:2812afc5f30a draft default tip

planemo upload for repository https://github.com/BMCV/galaxy-image-analysis/tree/master/tools/iscc-sum commit 6db86b8b65a0e05b7f3541d505fbe900633fc72a
author imgteam
date Fri, 19 Dec 2025 15:02:55 +0000
parents 01155dd89628
children
comparison
equal deleted inserted replaced
0:01155dd89628 1:2812afc5f30a
1 <tool id="iscc_sum" name="Generate ISCC hash" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.1"> 1 <tool id="iscc_sum" name="Generate ISCC-CODE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.1">
2 <description>with ISCC-SUM</description> 2 <description>with ISCC-SUM</description>
3 3
4 <macros> 4 <macros>
5 <import>macros.xml</import> 5 <import>macros.xml</import>
6 <import>creators.xml</import> 6 <import>creators.xml</import>
13 <expand macro="creators/maartenpaul" /> 13 <expand macro="creators/maartenpaul" />
14 <expand macro="creators/etzm" /> 14 <expand macro="creators/etzm" />
15 </creator> 15 </creator>
16 16
17 <command detect_errors="exit_code"><![CDATA[ 17 <command detect_errors="exit_code"><![CDATA[
18 iscc-sum '$input_file' | cut -d':' -f2 | cut -d' ' -f1 > '${output_file}' 18 ## Generate ISCC-CODE for input dataset
19 iscc-sum '${input_file}' | cut -d':' -f2 | cut -d' ' -f1 >> '${output_file}'
19 ]]></command> 20 ]]></command>
20 21
21 <inputs> 22 <inputs>
22 <param name="input_file" type="data" format="data" label="Input File" 23 <param name="input_file" type="data" format="data" label="Input file"
23 help="Any file type - ISCC-SUM will generate a checksum and similarity hash"/> 24 help="Any file type - ISCC-SUM will generate an ISCC-CODE for content identification and verification. When a collection is provided, generates one ISCC-CODE per dataset."/>
24 </inputs> 25 </inputs>
25 26
26 <outputs> 27 <outputs>
27 <data name="output_file" format="txt" label="${tool.name} on ${on_string}"/> 28 <data name="output_file" format="txt" label="${tool.name} on ${on_string}"/>
28 </outputs> 29 </outputs>
29 30
30 <tests> 31 <tests>
32 <!-- Test 1: Single dataset PNG -->
31 <test expect_num_outputs="1"> 33 <test expect_num_outputs="1">
32 <param name="input_file" value="test1.png"/> 34 <param name="input_file" value="test1.png"/>
33 <output name="output_file"> 35 <output name="output_file">
34 <assert_contents> 36 <assert_contents>
35 <has_line line="K4AOMGOGQJA4Y46PAC4YPPA63GKD5RVFPR7FU3I4OOEW44TYXNYOTMY" /> 37 <has_line line="K4AOMGOGQJA4Y46PAC4YPPA63GKD5RVFPR7FU3I4OOEW44TYXNYOTMY" />
38 <has_n_lines n="1" />
36 </assert_contents> 39 </assert_contents>
37 </output> 40 </output>
38 </test> 41 </test>
42
43 <!-- Test 2: Single dataset FASTA -->
44 <test expect_num_outputs="1">
45 <param name="input_file" value="test3.fasta"/>
46 <output name="output_file">
47 <assert_contents>
48 <has_line line="K4AKF7PTZ7JTAAYZ7YZHZPR5RETKYXXE7RTBTJA4JX5GQQMSLZRC6QQ" />
49 <has_n_lines n="1" />
50 </assert_contents>
51 </output>
52 </test>
53
54 <!-- Test 3: Single dataset TIFF -->
39 <test expect_num_outputs="1"> 55 <test expect_num_outputs="1">
40 <param name="input_file" value="test2.tiff"/> 56 <param name="input_file" value="test2.tiff"/>
41 <output name="output_file"> 57 <output name="output_file">
42 <assert_contents> 58 <assert_contents>
43 <has_line line="K4AGSPOSB5SS2X427WZ27QASTSBVTS55DXLMFDF7WOJKEOSTDEI3OXQ" /> 59 <has_line line="K4AGSPOSB5SS2X427WZ27QASTSBVTS55DXLMFDF7WOJKEOSTDEI3OXQ" />
44 </assert_contents> 60 <has_n_lines n="1" />
45 </output>
46 </test>
47 <test expect_num_outputs="1">
48 <param name="input_file" value="test3.fasta"/>
49 <output name="output_file">
50 <assert_contents>
51 <has_line line="K4AKF7PTZ7JTAAYZ7YZHZPR5RETKYXXE7RTBTJA4JX5GQQMSLZRC6QQ" />
52 </assert_contents> 61 </assert_contents>
53 </output> 62 </output>
54 </test> 63 </test>
55 </tests> 64 </tests>
56 65
57 <help><![CDATA[ 66 <help><![CDATA[
58 **What it does** 67 What it does
68 ============
59 69
60 Generates an International Standard Content Code (ISCC) based checksum and similarity hash from any input file. 70 Generates an ISCC-CODE (International Standard Content Code) for datasets using the ISCC-SUM algorithm.
61 71
62 The ISCC-SUM is a content-derived identifier that: 72 The ISCC-SUM creates a content-derived identifier that:
63 - Creates a unique checksum based on file content
64 - Generates a similarity hash for content comparison
65 - Works with any file format
66 73
67 **Input** 74 - Creates a unique 55-character ISCC-CODE based on dataset content
75 - Enables both exact matching (checksum) and similarity detection
76 - Works with any file format
68 77
69 Any file format is accepted as input. 78 Dataset Mapping
79 ===============
70 80
71 **Output** 81 When you provide a collection, Galaxy automatically runs this tool once per dataset, generating individual ISCC-CODEs for each dataset in the collection.
72 82
73 A text file containing the ISCC-SUM code for the input file. 83 Output
84 ======
74 85
75 **More Information** 86 A text file containing the ISCC-CODE (55 characters)
76 87
77 For more details about ISCC, visit: https://iscc.codes/ 88 Example output::
89
90 K4AOMGOGQJA4Y46PAC4YPPA63GKD5RVFPR7FU3I4OOEW44TYXNYOTMY
91
92 Use Cases
93 =========
94
95 - Generate ISCC-CODEs for file integrity verification
96 - Create content identifiers for duplicate detection
97 - Track file provenance and changes over time
98 - Enable similarity-based file comparison
99
100 ISCC-CODE Structure
101 ===================
102
103 The 55-character ISCC-CODE is composed of multiple ISCC-UNITs:
104
105 - **Data-Code**: Content similarity hash (enables fuzzy matching for similar files)
106 - **Instance-Code**: Exact file checksum (for bit-perfect verification)
107
108 This combination creates an ISCC-CODE with SubType SUM, hence the name ISCC-SUM.
109
110 Workflow Examples
111 =================
112
113 Generate ISCC-CODEs for a collection
114 -------------------------------------
115
116 ::
117
118 Input: Collection of 100 datasets
119
120 [Generate ISCC-CODE] ← runs 100 times
121
122 Output: Collection of 100 ISCC-CODE files
123
124 [Collapse Collection] ← Galaxy tool
125
126 Result: Single file with all ISCC-CODEs
127
128 Create reference ISCC-CODEs
129 ----------------------------
130
131 ::
132
133 Input: Original datasets
134
135 [Generate ISCC-CODE]
136
137 Store ISCC-CODEs for future verification
138
139 Compare datasets
140 ----------------
141
142 ::
143
144 Dataset A → [Generate ISCC-CODE] → ISCC-CODE A
145 Dataset B → [Generate ISCC-CODE] → ISCC-CODE B
146
147 Compare ISCC-CODEs
148
149 Result: Exact match or similarity score
150
151 More Information
152 ================
153
154 For details about ISCC: https://sum.iscc.codes/ and https://iscc.codes/
155 For ISCC structure and subtypes: https://ieps.iscc.codes/iep-0001/
78 ]]></help> 156 ]]></help>
79 157
80 <expand macro="citations" /> 158 <expand macro="citations" />
81 159
82 </tool> 160 </tool>