comparison teloscope.xml @ 0:be2c72b9798b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/teloscope commit 91ae0a450a15bf0a37c017b57831d40e2cea8dbc
author iuc
date Thu, 27 Nov 2025 08:52:19 +0000
parents
children 089c6e8122c8
comparison
equal deleted inserted replaced
-1:000000000000 0:be2c72b9798b
1 <tool id="teloscope" name="Teloscope" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Assembly telomere annotation</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <version_command>teloscope -v</version_command>
8 <command detect_errors="exit_code"><![CDATA[
9 mkdir -p output &&
10 teloscope
11 --input-sequence '$input_sequence'
12 --output output
13 --canonical '$canonical'
14 --patterns '$patterns'
15 --window '$window'
16 --step '$step'
17 --threads "\${GALAXY_SLOTS:-2}"
18 --terminal-limit '$terminal_limit'
19 --max-match-distance '$max_match_distance'
20 --max-block-distance '$max_block_distance'
21 --min-block-length '$min_block_length'
22 --min-block-density '$min_block_density'
23
24 $out_win_repeats
25 $out_gc
26 $out_entropy
27 $out_matches
28 $out_its
29 $ultra_fast
30 $verbose
31 > output/${input_sequence.name}.telo.report
32 ]]></command>
33 <inputs>
34 <param argument="--input-sequence" type="data" format="fasta,fasta.gz" label="Input assembly"/>
35 <param argument="--canonical" type="text" value="TTAGGG" label="Canonical telomeric pattern">
36 <sanitizer>
37 <valid initial="string.printable">
38 <remove value=" "/>
39 <remove value="\t"/>
40 <remove value="\r"/>
41 <remove value="\n"/>
42 </valid>
43 </sanitizer>
44 <validator type="regex" message="Canonical must contain only A,C,G,T.">^[ACGTacgt]+$</validator>
45 </param>
46
47 <param argument="--patterns" type="text" value="TTAGGG,CCCTAA"
48 label="Patterns to explore (comma-separated), IUPAC allowed">
49 <validator type="regex"
50 message="Use IUPAC letters ACGTRYSWKMBDHVN; comma-separated.">^[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+(?:,[ACGTRYSWKMBDHVNacgtryswkmbdhvn]+)*$</validator>
51 </param>
52
53 <param argument="--window" type="integer" min="100" value="1000" label="Sliding window size"/>
54 <param argument="--step" type="integer" min="100" value="500" label="Sliding window step"/>
55 <param argument="--terminal-limit" type="integer" min="10000" value="50000" label="Terminal limit (bp) in contigs"/>
56 <param argument="--max-match-distance" type="integer" min="0" value="50" label="Maximum distance (bp) for merging matches"/>
57 <param argument="--max-block-distance" type="integer" min="0" value="200" label="Maximum block distance for merging"/>
58 <param argument="--min-block-length" type="integer" min="0" value="500" label="Minimum block length"/>
59 <param argument="--min-block-density" type="float" min="0" max="1" value="0.5" label="Minimum block density (0–1)"/>
60
61 <param argument="--out-win-repeats" type="boolean" truevalue="--out-win-repeats" falsevalue="" checked="false" label="Window repeat counts"/>
62 <param argument="--out-gc" type="boolean" truevalue="--out-gc" falsevalue="" checked="false" label="Window GC"/>
63 <param argument="--out-entropy" type="boolean" truevalue="--out-entropy" falsevalue="" checked="false" label="Window Shannon entropy"/>
64 <param argument="--out-matches" type="boolean" truevalue="--out-matches" falsevalue="" checked="false" label="Canonical/NonCanonical Matches"/>
65 <param argument="--out-its" type="boolean" truevalue="--out-its" falsevalue="" checked="false" label="Interstitial telomeres (ITSs)"/>
66 <param argument="--ultra-fast" type="boolean" truevalue="--ultra-fast" falsevalue="" checked="true" label="Ultra-fast mode (terminal regions only)"/>
67 <param argument="--verbose" type="boolean" truevalue="--verbose" falsevalue="" checked="false" label="Verbose output"/>
68 </inputs>
69
70 <outputs>
71 <!-- BASIC OUTFILES -->
72 <data name="terminal_telomeres" format="bed" from_work_dir="output/*_terminal_telomeres.bed" label="${tool.name} on ${on_string}: Terminal telomeres"/>
73 <data name="telo_report" format="tabular" from_work_dir="output/*.telo.report" label="${tool.name} on ${on_string}: Summary report"/>
74
75 <!-- OPTIONAL OUTFILES -->
76 <data name="interstitial_telomeres" format="bed" from_work_dir="output/*_interstitial_telomeres.bed" label="${tool.name} on ${on_string}: Interstitial telomeres">
77 <filter>out_its</filter>
78 </data>
79 <data name="canonical_matches" format="bed" from_work_dir="output/*_canonical_matches.bed" label="${tool.name} on ${on_string}: Canonical matches">
80 <filter>out_matches</filter>
81 </data>
82 <data name="noncanonical_matches" format="bed" from_work_dir="output/*_noncanonical_matches.bed" label="${tool.name} on ${on_string}: Noncanonical matches">
83 <filter>out_matches</filter>
84 </data>
85 <data name="window_metrics" format="bedgraph" from_work_dir="output/*_window_metrics.bedgraph" label="${tool.name} on ${on_string}: Window metrics">
86 <filter>out_gc or out_entropy or out_win_repeats</filter>
87 </data>
88 </outputs>
89
90 <tests>
91 <!-- 1) Default (-u) : terminal telomeres + report -->
92 <test expect_num_outputs="2">
93 <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/>
94 <param name="canonical" value="TTAGGG"/>
95 <param name="patterns" value="TTAGGG,CCCTAA"/>
96 <output name="terminal_telomeres">
97 <assert_contents>
98 <!-- at least one BED-like line (match whole line to allow extra cols) -->
99 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
100 </assert_contents>
101 </output>
102 <output name="telo_report">
103 <assert_contents>
104 <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/>
105 <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/>
106 <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/>
107 <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/>
108 <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/>
109 </assert_contents>
110 </output>
111 </test>
112
113 <!-- 2) Genome-wide (-g -e -r -m -i ; disable -u) -->
114 <test expect_num_outputs="6">
115 <param name="input_sequence" value="bTaeGut7_chr33_mat.fa.gz" ftype="fasta.gz"/>
116 <param name="canonical" value="TTAGGG"/>
117 <param name="patterns" value="TTAGGG,CCCTAA"/>
118 <param name="out_gc" value="true"/>
119 <param name="out_entropy" value="true"/>
120 <param name="out_win_repeats" value="true"/>
121 <param name="out_matches" value="true"/>
122 <param name="out_its" value="true"/>
123 <param name="ultra_fast" value="false"/>
124
125 <output name="terminal_telomeres">
126 <assert_contents>
127 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
128 </assert_contents>
129 </output>
130 <output name="interstitial_telomeres">
131 <assert_contents>
132 <has_line_matching expression="^\S+\t\d+\t\d+(?:\t.*)?$"/>
133 </assert_contents>
134 </output>
135 <output name="canonical_matches">
136 <assert_contents>
137 <!-- ensure at least one CCCTAA match line -->
138 <has_line_matching expression="^\S+\t\d+\t\d+\tCCCTAA(?:\s|$)"/>
139 </assert_contents>
140 </output>
141 <output name="noncanonical_matches">
142 <assert_contents>
143 <!-- expect an empty file -->
144 <has_n_lines n="0"/>
145 </assert_contents>
146 </output>
147 <output name="window_metrics">
148 <assert_contents>
149 <has_text text="track type=bedGraph"/>
150 <!-- first data window line -->
151 <has_line_matching expression="^\S+\t0\t1000\t.*"/>
152 </assert_contents>
153 </output>
154 <output name="telo_report">
155 <assert_contents>
156 <has_line_matching expression="\+\+\+ Path Summary Report \+\+\+"/>
157 <has_line_matching expression="\+\+\+ Assembly Summary Report \+\+\+"/>
158 <has_line_matching expression="\+\+\+ Telomere Statistics \+\+\+"/>
159 <has_line_matching expression="\+\+\+ Chromosome Telomere Counts\+\+\+"/>
160 <has_line_matching expression="\+\+\+ Chromosome Telomere/Gap Completeness\+\+\+"/>
161 </assert_contents>
162 </output>
163 </test>
164 </tests>
165
166 <help><![CDATA[
167 Description:
168 Teloscope is a tool for telomere annotation in genome assemblies.
169 It scans for user-specified telomeric repeat patterns across assembly paths, contigs and windows.
170 Teloscope annotates terminal and interstitial telomeres, canonical/noncanonical matches and genome-wide metrics such as GC content, Shannon entropy, and repeat counts.
171 It generates a detailed telomere summary report for paths, telomere statistics, and chromosome labels to assess telomere completeness.
172 Teloscope can be used for both complete and fragmented assemblies, providing valuable information for genome manual curation and analysis.
173
174 Usage:
175 Default (ultra-fast) scans terminal regions and reports terminal telomeres + a summary report.
176 * ${input_sequence.name}_terminal_telomeres.bed
177 * ${input_sequence.name}.telo.report
178
179 Enabling window/match options (-g -e -r -m -i) performs a genome-wide scan and produces:
180 * ${input_sequence.name}_terminal_telomeres.bed
181 * ${input_sequence.name}_interstitial_telomeres.bed
182 * ${input_sequence.name}_canonical_matches.bed
183 * ${input_sequence.name}_noncanonical_matches.bed
184 * ${input_sequence.name}_window_metrics.bedgraph
185 * ${input_sequence.name}.telo.report
186
187 Key parameters:
188 - -c / --canonical: Canonical repeat (default TTAGGG). This is the vertebrate telomeric motif found at chromosome ends that binds to shelterin complex to form a telomere.
189 - -p / --patterns: Variant patterns (comma-separated). These are additional telomeric repeat motifs to search for, besides the canonical repeat, it includes other variants that can be part of telomeres.
190 - -w / -s: window size / step (defaults 1000/500)
191 - -u / --ultra-fast: terminal scan only (default true); disabled automatically when -g/-e/-r/-m/-i are used.
192 ]]></help>
193
194 <expand macro="citations"/>
195 </tool>